<!DOCTYPE html>
<html lang="zh-CN">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>arXiv 每日论文精选</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link href="https://cdn.jsdelivr.net/npm/font-awesome@4.7.0/css/font-awesome.min.css" rel="stylesheet">
    <link rel="stylesheet" href="static/styles.css?v=1763439853">
    <script src="static/tailwind.config.js"></script>

    <style>
        /* 分级折叠功能样式 */
        .collapsed-level-1 .paper-details {
            display: none;
        }
        
        .collapsed-level-2 {
            display: none !important;
        }
        
        /* 展开/折叠图标样式 */
        .expand-icon {
            display: inline-block;
            width: 20px;
            text-align: center;
            margin-right: 5px;
        }
        
        /* 展开/折叠按钮样式 */
        .expand-toggle {
            cursor: pointer;
            padding: 8px 12px;
            background-color: #f3f4f6;
            border: 1px solid #e5e7eb;
            border-radius: 6px;
            margin-bottom: 16px;
            text-align: center;
            font-weight: 500;
            color: #4b5563;
            transition: all 0.2s ease;
        }
        
        .expand-toggle:hover {
            background-color: #e5e7eb;
        }
        
        /* 分割线样式 */
        .papers-divider {
            height: 1px;
            background-color: #e5e7eb;
            margin: 20px 0;
            position: relative;
        }
        
        .papers-divider-label {
            position: absolute;
            left: 50%;
            top: 50%;
            transform: translate(-50%, -50%);
            background-color: white;
            padding: 0 12px;
            color: #9ca3af;
            font-size: 14px;
            cursor: pointer;
        }
        
        .papers-divider-label:hover {
            color: #4b5563;
        }
        
        /* 展开后的样式 */
        .expanded-all .collapsed-level-1 .paper-details,
        .expanded-all .collapsed-level-2 {
            display: block;
        }
        
        .expanded-level-2 .collapsed-level-2 {
            display: block;
        }
    </style>
    </head>

<body class="bg-gray-50 font-sans text-dark">
    <!-- 顶部导航与统计信息合并 -->
    <header class="bg-white shadow-sm sticky top-0 z-10 border-b border-gray-200">
        <div class="container mx-auto px-4 py-4">
            <div class="flex flex-col md:flex-row justify-between items-start md:items-center mb-3">
                <div class="flex items-center">
                    <i class="fa fa-book text-primary text-xl mr-2"></i>
                    <h1 class="text-lg md:text-xl font-bold text-gray-800">arXiv 每日论文精选</h1>
                </div>
                <div class="flex items-center mt-2 md:mt-0">
                    <span id="current-date" class="text-gray-600 text-sm">
                        <i class="fa fa-calendar-o mr-1"></i>2025-11-18
                    </span>
                    <div class="ml-3 relative" id="date-picker-container">
                        <button id="date-picker-toggle" class="bg-light border border-gray-300 text-gray-700 py-1 px-3 pr-6 rounded text-sm leading-tight focus:outline-none focus:bg-white inline-flex items-center">
                            <i class="fa fa-calendar mr-2"></i>
                            <span id="selected-date-text">2025-11-18</span>
                            <i class="fa fa-chevron-down ml-2 text-xs"></i>
                        </button>
                        <div id="date-picker" class="hidden absolute right-0 mt-1 bg-white border border-gray-300 rounded shadow-lg p-2 z-20 w-56">
                            <div class="flex justify-between items-center mb-2">
                                <button id="prev-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-left"></i></button>
                                <h4 id="current-month">2025-11-18</h4>
                                <button id="next-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-right"></i></button>
                            </div>
                            <div class="grid grid-cols-7 gap-1 text-center text-xs mb-1">
                                <div class="text-gray-500">日</div>
                                <div class="text-gray-500">一</div>
                                <div class="text-gray-500">二</div>
                                <div class="text-gray-500">三</div>
                                <div class="text-gray-500">四</div>
                                <div class="text-gray-500">五</div>
                                <div class="text-gray-500">六</div>
                            </div>
                            <div id="calendar-grid" class="grid grid-cols-7 gap-1 text-center text-sm">
                                <!-- 日历格子将通过JavaScript动态生成 -->
                            </div>
                        </div>
                    </div>
                </div>
            </div>

            <!-- 统计信息 -->
            <div class="flex flex-wrap gap-4 text-sm">
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-file-text-o"></i> 总论文数:</span>
                    <span id="total-papers" class="font-semibold text-primary">159</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-star"></i> 精选论文数:</span>
                    <span id="selected-papers" class="font-semibold text-accent">20</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-line-chart"></i> 平均评分:</span>
                    <span id="avg-score" class="font-semibold text-secondary">2.3</span>
                </div>
            </div>
        </div>
    </header>

    <!-- 主内容区 -->
    <main class="container mx-auto px-4 py-5">
        <!-- 筛选器 -->
        <div class="mb-4 flex flex-col sm:flex-row justify-between items-start sm:items-center">
            <div class="text-gray-700 text-sm mb-2 sm:mb-0">
                <span id="display-count" class="font-medium">显示 159 篇论文 (共 159 篇)</span>
            </div>
            <div class="flex space-x-2">
                <button id="show-all"
                    class="px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors">
                    全部论文
                </button>
                <button id="show-selected"
                    class="px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors">
                    仅显示精选
                </button>
            </div>
        </div>

        <!-- 论文列表 -->
        <div id="papers-container" class="grid grid-cols-1 gap-4">
            
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13201v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>Cog-RAG：基于认知启发的双超图与主题对齐的检索增强生成
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Cog-RAG: Cognitive-Inspired Dual-Hypergraph with Theme Alignment Retrieval-Augmented Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hao Hu, Yifan Feng, Ruoxue Li, Rundong Xue, Xingliang Hou, Zhiqiang Tian, Yue Ga...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何提升检索增强生成中多实体高阶语义关系建模和全局主题对齐的问题，核心方法是构建主题超图和实体超图的双超图框架，并采用认知启发的两阶段检索策略实现从全局主题到局部细节的语义对齐生成。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对RAG系统进行核心架构创新，提出双超图结构和认知启发检索策略，属于LLM应用和Transformer技术的前沿研究，与推荐搜索领域高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:10:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13201v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13201v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval-Augmented Generation (RAG) enhances the response quality and domain-specific performance of large language models (LLMs) by incorporating external knowledge to combat hallucinations. In recent research, graph structures have been integrated into RAG to enhance the capture of semantic relations between entities. However, it primarily focuses on low-order pairwise entity relations, limiting the high-order associations among multiple entities. Hypergraph-enhanced approaches address this limitation by modeling multi-entity interactions via hyperedges, but they are typically constrained to inter-chunk entity-level representations, overlooking the global thematic organization and alignment across chunks. Drawing inspiration from the top-down cognitive process of human reasoning, we propose a theme-aligned dual-hypergraph RAG framework (Cog-RAG) that uses a theme hypergraph to capture inter-chunk thematic structure and an entity hypergraph to model high-order semantic relations. Furthermore, we design a cognitive-inspired two-stage retrieval strategy that first activates query-relevant thematic content from the theme hypergraph, and then guides fine-grained recall and diffusion in the entity hypergraph, achieving semantic alignment and consistent generation from global themes to local details. Our extensive experiments demonstrate that Cog-RAG significantly outperforms existing state-of-the-art baseline approaches.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13041v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>通过表示学习中的组对齐和全局一致性缓解推荐偏见
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mitigating Recommendation Biases via Group-Alignment and Global-Uniformity in Representation Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Miaomiao Cai, Min Hou, Lei Chen, Le Wu, Haoyue Bai, Yong Li, Meng Wang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究推荐系统中因训练数据不平衡导致的流行度偏差问题，核心思想是通过表示空间中的组对齐和全局均匀性正则化来缓解偏差，其中组对齐使长尾实体表示分布接近流行实体，全局均匀性保持实体信息完整性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对推荐系统中的核心偏差问题，提出基于表示分布的正则化方法，属于推荐系统领域的核心进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 06:42:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13041v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13041v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Collaborative Filtering~(CF) plays a crucial role in modern recommender systems, leveraging historical user-item interactions to provide personalized suggestions. However, CF-based methods often encounter biases due to imbalances in training data. This phenomenon makes CF-based methods tend to prioritize recommending popular items and performing unsatisfactorily on inactive users. Existing works address this issue by rebalancing training samples, reranking recommendation results, or making the modeling process robust to the bias. Despite their effectiveness, these approaches can compromise accuracy or be sensitive to weighting strategies, making them challenging to train. In this paper, we deeply analyze the causes and effects of the biases and propose a framework to alleviate biases in recommendation from the perspective of representation distribution, namely Group-Alignment and Global-Uniformity Enhanced Representation Learning for Debiasing Recommendation (AURL). Specifically, we identify two significant problems in the representation distribution of users and items, namely group-discrepancy and global-collapse. These two problems directly lead to biases in the recommendation results. To this end, we propose two simple but effective regularizers in the representation space, respectively named group-alignment and global-uniformity. The goal of group-alignment is to bring the representation distribution of long-tail entities closer to that of popular entities, while global-uniformity aims to preserve the information of entities as much as possible by evenly distributing representations. Our method directly optimizes both the group-alignment and global-uniformity regularization terms to mitigate recommendation biases. Extensive experiments on three real datasets and various recommendation backbones verify the superiority of our proposed framework.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12922v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>一次分词，随处推荐：面向多领域LLM推荐系统的统一物品分词方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Tokenize Once, Recommend Anywhere: Unified Item Tokenization for Multi-domain LLM-based Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yu Hou, Won-Yong Shin
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究多领域LLM推荐系统中物品tokenization的统一建模问题，核心思想是设计混合专家架构，通过共享编码器、领域特定专家和共享专家，在统一潜在空间中实现跨领域语义信息的保留与平衡。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对多领域推荐系统的统一建模问题，结合MoE架构和统一tokenization方法，在LLM推荐系统的核心挑战上提供了创新解决方案。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 03:18:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12922v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12922v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.NE</span><span class="category-tag">cs.SI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language model (LLM)-based recommender systems have achieved high-quality performance by bridging the discrepancy between the item space and the language space through item tokenization. However, existing item tokenization methods typically require training separate models for each item domain, limiting generalization. Moreover, the diverse distributions and semantics across item domains make it difficult to construct a unified tokenization that preserves domain-specific information. To address these challenges, we propose UniTok, a Unified item Tokenization framework that integrates our own mixture-of-experts (MoE) architecture with a series of codebooks to convert items into discrete tokens, enabling scalable tokenization while preserving semantic information across multiple item domains. Specifically, items from different domains are first projected into a unified latent space through a shared encoder. They are then routed to domain-specific experts to capture the unique semantics, while a shared expert, which is always active, encodes common knowledge transferable across domains. Additionally, to mitigate semantic imbalance across domains, we present a mutual information calibration mechanism, which guides the model towards retaining similar levels of semantic information for each domain. Comprehensive experiments on wide-ranging real-world datasets demonstrate that the proposed UniTok framework is (a) highly effective: achieving up to 51.89% improvements over strong benchmarks, (b) theoretically sound: showing the analytical validity of our architectural design and optimization; and (c) highly generalizable: demonstrating robust performance across diverse domains without requiring per-domain retraining, a capability not supported by existing baselines.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13254v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>Souper-Model：简单算术如何解锁最先进大语言模型性能
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Souper-Model: How Simple Arithmetic Unlocks State-of-the-Art LLM Performance
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shalini Maiti, Amar Budhiraja, Bhavul Gauri, Gaurav Chaurasia, Anton Protopopov,...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何通过模型权重融合提升LLM性能，核心思想是利用基准测试类别间的低相关性识别专家模型，并通过非均匀加权平均而非传统均匀平均来组合这些专家模型。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的模型融合方法直接应用于LLM性能提升，属于核心LLM技术进展，对搜索和推荐系统的模型优化具有直接应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:13:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13254v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13254v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) have demonstrated remarkable capabilities across diverse domains, but their training remains resource- and time-intensive, requiring massive compute power and careful orchestration of training procedures. Model souping-the practice of averaging weights from multiple models of the same architecture-has emerged as a promising pre- and post-training technique that can enhance performance without expensive retraining. In this paper, we introduce Soup Of Category Experts (SoCE), a principled approach for model souping that utilizes benchmark composition to identify optimal model candidates and applies non-uniform weighted averaging to maximize performance. Contrary to previous uniform-averaging approaches, our method leverages the observation that benchmark categories often exhibit low inter-correlations in model performance. SoCE identifies "expert" models for each weakly-correlated category cluster and combines them using optimized weighted averaging rather than uniform weights. We demonstrate that the proposed method improves performance and robustness across multiple domains, including multilingual capabilities, tool calling, and math and achieves state-of-the-art results on the Berkeley Function Calling Leaderboard.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13415v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于注意力机制增强的视觉文档检索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Attention Grounded Enhancement for Visual Document Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wanqing Cui, Wei Huang, Yazhi Guo, Yibo Hu, Meiguang Jin, Junfeng Ma, Keping Bi
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究视觉文档检索中检索器依赖表层线索而难以捕捉隐式语义连接的问题，核心思想是利用多模态大语言模型的跨模态注意力作为代理局部监督，指导识别相关文档区域，结合全局信号联合优化检索器。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的注意力引导增强框架直接针对检索系统的核心问题，通过跨模态注意力提供局部监督信号，与推荐和搜索中的细粒度匹配高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:28:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13415v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13415v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Visual document retrieval requires understanding heterogeneous and multi-modal content to satisfy information needs. Recent advances use screenshot-based document encoding with fine-grained late interaction, significantly improving retrieval performance. However, retrievers are still trained with coarse global relevance labels, without revealing which regions support the match. As a result, retrievers tend to rely on surface-level cues and struggle to capture implicit semantic connections, hindering their ability to handle non-extractive queries. To alleviate this problem, we propose a \textbf{A}ttention-\textbf{G}rounded \textbf{RE}triever \textbf{E}nhancement (AGREE) framework. AGREE leverages cross-modal attention from multimodal large language models as proxy local supervision to guide the identification of relevant document regions. During training, AGREE combines local signals with the global signals to jointly optimize the retriever, enabling it to learn not only whether documents match, but also which content drives relevance. Experiments on the challenging ViDoRe V2 benchmark show that AGREE significantly outperforms the global-supervision-only baseline. Quantitative and qualitative analyses further demonstrate that AGREE promotes deeper alignment between query terms and document regions, moving beyond surface-level matching toward more accurate and interpretable retrieval. Our code is available at: https://anonymous.4open.science/r/AGREE-2025.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13189v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>大语言模型遇上极端多标签分类：扩展与多模态框架
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Large Language Models Meet Extreme Multi-label Classification: Scaling and Multi-modal Framework
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Diego Ortego, Marlon Rodríguez, Mario Almagro, Kunal Dahiya, David Jiménez, Juan...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何将解码器大语言模型和视觉信息有效整合到极端多标签分类中，核心思想是通过高效嵌入池化实现多模态融合，在保持计算效率的同时利用视觉信息增强分类能力。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文探索解码器大模型在极端多标签分类中的应用，并引入视觉模态增强，与推荐系统的多模态建模和高效扩展高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:52:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13189v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13189v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Foundation models have revolutionized artificial intelligence across numerous domains, yet their transformative potential remains largely untapped in Extreme Multi-label Classification (XMC). Queries in XMC are associated with relevant labels from extremely large label spaces, where it is critical to strike a balance between efficiency and performance. Therefore, many recent approaches efficiently pose XMC as a maximum inner product search between embeddings learned from small encoder-only transformer architectures. In this paper, we address two important aspects in XMC: how to effectively harness larger decoder-only models, and how to exploit visual information while maintaining computational efficiency. We demonstrate that both play a critical role in XMC separately and can be combined for improved performance. We show that a few billion-size decoder can deliver substantial improvements while keeping computational overhead manageable. Furthermore, our Vision-enhanced eXtreme Multi-label Learning framework (ViXML) efficiently integrates foundation vision models by pooling a single embedding per image. This limits computational growth while unlocking multi-modal capabilities. Remarkably, ViXML with small encoders outperforms text-only decoder in most cases, showing that an image is worth billions of parameters. Finally, we present an extension of existing text-only datasets to exploit visual metadata and make them available for future benchmarking. Comprehensive experiments across four public text-only datasets and their corresponding image enhanced versions validate our proposals' effectiveness, surpassing previous state-of-the-art by up to +8.21\% in P@1 on the largest dataset. ViXML's code is available at https://github.com/DiegoOrtego/vixml.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12949v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>我们能否预测下一个问题？一种基于协同过滤的用户行为建模方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Can We Predict the Next Question? A Collaborative Filtering Approach to Modeling User Behavior
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bokang Fu, Jiahao Wang, Xiaojing Liu, Yuli Liu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何预测用户的下一个问题，核心方法是提出协同过滤增强的问题预测框架，通过个性化记忆模块和图基偏好传播动态建模用户-问题交互序列。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对推荐系统中的动态用户行为建模问题，提出结合协同过滤与序列预测的方法，与核心领域进展和直接LLM应用高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 04:01:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12949v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12949v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In recent years, large language models (LLMs) have excelled in language understanding and generation, powering advanced dialogue and recommendation systems. However, a significant limitation persists: these systems often model user preferences statically, failing to capture the dynamic and sequential nature of interactive behaviors. The sequence of a user's historical questions provides a rich, implicit signal of evolving interests and cognitive patterns, yet leveraging this temporal data for predictive tasks remains challenging due to the inherent disconnect between language modeling and behavioral sequence modeling. To bridge this gap, we propose a Collaborative Filtering-enhanced Question Prediction (CFQP) framework. CFQP dynamically models evolving user-question interactions by integrating personalized memory modules with graph-based preference propagation. This dual mechanism allows the system to adaptively learn from user-specific histories while refining predictions through collaborative signals from similar users. Experimental results demonstrate that our approach effectively generates agents that mimic real-user questioning patterns, highlighting its potential for building proactive and adaptive dialogue systems.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12947v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>一种即插即用的空间约束表示增强框架用于本地生活推荐
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Plug-and-Play Spatially-Constrained Representation Enhancement Framework for Local-Life Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hao Jiang, Guoquan Wang, Sheng Yu, Yang Zeng, Wencong Zeng, Guorui Zhou
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究本地生活推荐中因空间约束导致的长尾物品曝光不足问题，核心思想是从物品中心视角出发，通过元ID预热和空间约束对比学习来增强长尾物品表示，同时保持与热门物品的兼容性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对推荐系统的核心挑战——长尾稀疏性和空间约束，提出了一种新颖的表示增强框架，属于推荐系统领域的核心进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 03:58:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12947v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12947v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Local-life recommendation have witnessed rapid growth, providing users with convenient access to daily essentials. However, this domain faces two key challenges: (1) spatial constraints, driven by the requirements of the local-life scenario, where items are usually shown only to users within a limited geographic area, indirectly reducing their exposure probability; and (2) long-tail sparsity, where few popular items dominate user interactions, while many high-quality long-tail items are largely overlooked due to imbalanced interaction opportunities. Existing methods typically adopt a user-centric perspective, such as modeling spatial user preferences or enhancing long-tail representations with collaborative filtering signals. However, we argue that an item-centric perspective is more suitable for this domain, focusing on enhancing long-tail items representation that align with the spatially-constrained characteristics of local lifestyle services. To tackle this issue, we propose ReST, a Plug-And-Play Spatially-Constrained Representation Enhancement Framework for Long-Tail Local-Life Recommendation. Specifically, we first introduce a Meta ID Warm-up Network, which initializes fundamental ID representations by injecting their basic attribute-level semantic information. Subsequently, we propose a novel Spatially-Constrained ID Representation Enhancement Network (SIDENet) based on contrastive learning, which incorporates two efficient strategies: a spatially-constrained hard sampling strategy and a dynamic representation alignment strategy. This design adaptively identifies weak ID representations based on their attribute-level information during training. It additionally enhances them by capturing latent item relationships within the spatially-constrained characteristics of local lifestyle services, while preserving compatibility with popular items.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12934v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>AIF：用于成本效益预排序的异步推理框架
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AIF: Asynchronous Inference Framework for Cost-Effective Pre-Ranking
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhi Kou, Xiang-Rong Sheng, Shuguang Han, Zhishan Zhao, Yueyao Cheng, Han Zhu, Ji...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究工业推荐系统中预排序模型的效率瓶颈问题，核心思想是通过解耦用户侧和物品侧计算，采用异步并行执行框架来提升系统性能。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出异步推理框架解决推荐系统预排序阶段的效率和延迟瓶颈，直接针对工业推荐系统的核心架构优化问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 03:39:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12934v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12934v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In industrial recommendation systems, pre-ranking models based on deep neural networks (DNNs) commonly adopt a sequential execution framework: feature fetching and model forward computation are triggered only after receiving candidates from the upstream retrieval stage. This design introduces inherent bottlenecks, including redundant computations of identical users/items and increased latency due to strictly sequential operations, which jointly constrain the model's capacity and system efficiency. To address these limitations, we propose the Asynchronous Inference Framework (AIF), a cost-effective computational architecture that decouples interaction-independent components, those operating within a single user or item, from real-time prediction. AIF reorganizes the model inference process by performing user-side computations in parallel with the retrieval stage and conducting item-side computations in a nearline manner. This means that interaction-independent components are calculated just once and completed before the real-time prediction phase of the pre-ranking stage. As a result, AIF enhances computational efficiency and reduces latency, freeing up resources to significantly improve the feature set and model architecture of interaction-independent components. Moreover, we delve into model design within the AIF framework, employing approximated methods for interaction-dependent components in online real-time predictions. By co-designing both the framework and the model, our solution achieves notable performance gains without significantly increasing computational and latency costs. This has enabled the successful deployment of AIF in the Taobao display advertising system.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13593v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>面向个性化、长视野、自演进智能体的全记忆系统
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Omni Memory System for Personalized, Long Horizon, Self-Evolving Agents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Piaohong Wang, Motong Tian, Jiaxian Li, Yuan Liang, Yuqing Wang, Qianben Chen, T...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM智能体在复杂环境中长期交互的上下文一致性和动态个性化问题，核心方法是基于主动用户画像构建记忆框架，动态提取用户特征和事件记录，并支持人物属性和主题相关上下文的层次化检索。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的主动用户画像和层次化记忆检索机制直接适用于个性化推荐系统，其处理异构用户交互数据的方法与多模态建模理念相通。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:55:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13593v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13593v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advancements in LLM-powered agents have demonstrated significant potential in generating human-like responses; however, they continue to face challenges in maintaining long-term interactions within complex environments, primarily due to limitations in contextual consistency and dynamic personalization. Existing memory systems often depend on semantic grouping prior to retrieval, which can overlook semantically irrelevant yet critical user information and introduce retrieval noise. In this report, we propose the initial design of O-Mem, a novel memory framework based on active user profiling that dynamically extracts and updates user characteristics and event records from their proactive interactions with agents. O-Mem supports hierarchical retrieval of persona attributes and topic-related context, enabling more adaptive and coherent personalized responses. O-Mem achieves 51.76% on the public LoCoMo benchmark, a nearly 3% improvement upon LangMem,the previous state-of-the-art, and it achieves 62.99% on PERSONAMEM, a 3.5% improvement upon A-Mem,the previous state-of-the-art. O-Mem also boosts token and interaction response time efficiency compared to previous memory frameworks. Our work opens up promising directions for developing efficient and human-like personalized AI assistants in the future.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13410v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>Mem-PAL：面向基于记忆的个性化对话助手，用于长期用户-代理交互
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mem-PAL: Towards Memory-based Personalized Dialogue Assistants for Long-term User-Agent Interaction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhaopei Huang, Qifeng Dai, Guozheng Wu, Xiaopeng Wu, Kehan Chen, Chuan Yu, Xubin...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究长期用户-智能体交互中个性化对话助手的核心问题，提出分层异构记忆框架，通过检索增强生成技术整合用户历史对话和特征来提升个性化服务能力。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出基于记忆的个性化对话框架，直接应用于推荐系统相关的用户交互场景，并采用检索增强生成技术提升个性化响应能力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:22:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13410v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13410v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rise of smart personal devices, service-oriented human-agent interactions have become increasingly prevalent. This trend highlights the need for personalized dialogue assistants that can understand user-specific traits to accurately interpret requirements and tailor responses to individual preferences. However, existing approaches often overlook the complexities of long-term interactions and fail to capture users' subjective characteristics. To address these gaps, we present PAL-Bench, a new benchmark designed to evaluate the personalization capabilities of service-oriented assistants in long-term user-agent interactions. In the absence of available real-world data, we develop a multi-step LLM-based synthesis pipeline, which is further verified and refined by human annotators. This process yields PAL-Set, the first Chinese dataset comprising multi-session user logs and dialogue histories, which serves as the foundation for PAL-Bench. Furthermore, to improve personalized service-oriented interactions, we propose H$^2$Memory, a hierarchical and heterogeneous memory framework that incorporates retrieval-augmented generation to improve personalized response generation. Comprehensive experiments on both our PAL-Bench and an external dataset demonstrate the effectiveness of the proposed memory framework.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13368v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>捐赠者与接收者：基于参数高效微调的任务与语言间非对称迁移研究
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Donors and Recipients: On Asymmetric Transfer Across Tasks and Languages with Parameter-Efficient Fine-Tuning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kajetan Dymkiewicz, Ivan Vulic, Helen Yannakoudakis, Eilam Shapira, Roi Reichart...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究多语言大模型在任务和语言维度上的知识迁移规律，核心发现是存在任务匹配优先和捐赠者-接收者结构的不对称迁移模式。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文系统研究LLM跨任务和跨语言的知识迁移模式，直接关联参数高效微调技术在多语言推荐搜索系统中的实际应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:41:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13368v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13368v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) perform strongly across tasks and languages, yet how improvements in one task or language affect other tasks and languages and their combinations remains poorly understood. We conduct a controlled PEFT/LoRA study across multiple open-weight LLM families and sizes, treating task and language as transfer axes while conditioning on model family and size; we fine-tune each model on a single task-language source and measure transfer as the percentage-point change versus its baseline score when evaluated on all other task-language target pairs. We decompose transfer into (i) Matched-Task (Cross-Language), (ii) Matched-Language (Cross-Task), and (iii) Cross-Task (Cross-Language) regimes. We uncover two consistent general patterns. First, a pronounced on-task vs. off-task asymmetry: Matched-Task (Cross-Language) transfer is reliably positive, whereas off-task transfer often incurs collateral degradation. Second, a stable donor-recipient structure across languages and tasks (hub donors vs. brittle recipients). We outline implications for risk-aware fine-tuning and model specialisation.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12997v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>WebCoach：具有跨会话记忆引导的自演进网络智能体
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            WebCoach: Self-Evolving Web Agents with Cross-Session Memory Guidance
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Genglin Liu, Shijie Geng, Sha Li, Hejie Cui, Sarah Zhang, Xin Liu, Tianyi Liu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究网页浏览智能体无法跨会话学习经验的问题，核心思想是构建包含轨迹标准化、外部记忆存储和实时指导注入的自进化框架，使智能体能够利用历史经验改进长期规划和决策。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的跨会话记忆框架和自进化机制直接适用于推荐系统的长期用户建模和持续学习，其记忆存储与检索架构对处理用户序列数据具有重要参考价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 05:38:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12997v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12997v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal LLM-powered agents have recently demonstrated impressive capabilities in web navigation, enabling agents to complete complex browsing tasks across diverse domains. However, current agents struggle with repetitive errors and lack the ability to learn from past experiences across sessions, limiting their long-term robustness and sample efficiency. We introduce WebCoach, a model-agnostic self-evolving framework that equips web browsing agents with persistent cross-session memory, enabling improved long-term planning, reflection, and continual learning without retraining. WebCoach consists of three key components: (1) a WebCondenser, which standardizes raw navigation logs into concise summaries; (2) an External Memory Store, which organizes complete trajectories as episodic experiences; and (3) a Coach, which retrieves relevant experiences based on similarity and recency, and decides whether to inject task-specific advice into the agent via runtime hooks. This design empowers web agents to access long-term memory beyond their native context window, improving robustness in complex browsing tasks. Moreover, WebCoach achieves self-evolution by continuously curating episodic memory from new navigation trajectories, enabling agents to improve over time without retraining. Evaluations on the WebVoyager benchmark demonstrate that WebCoach consistently improves the performance of browser-use agents across three different LLM backbones. With a 38B model, it increases task success rates from 47% to 61% while reducing or maintaining the average number of steps. Notably, smaller base models with WebCoach achieve performance comparable to the same web agent using GPT-4o.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13418v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>通过迭代搜索探索多表检索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Exploring Multi-Table Retrieval Through Iterative Search
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Allaa Boutaleb, Bernd Amann, Rafael Angarita, Hubert Naacke
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究多表数据湖中的开放域问答检索问题，核心思想是将多表检索建模为迭代搜索过程，通过贪婪连接感知检索算法在语义相关性、查询覆盖率和表间连接性之间进行整体平衡。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的迭代式多表检索框架直接解决搜索和推荐系统中的核心问题——高效检索和组合异构数据源，其平衡相关性、覆盖率和连接性的思想对大规模推荐系统具有重要借鉴意义。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:31:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13418v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13418v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.DB</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Open-domain question answering over datalakes requires retrieving and composing information from multiple tables, a challenging subtask that demands semantic relevance and structural coherence (e.g., joinability). While exact optimization methods like Mixed-Integer Programming (MIP) can ensure coherence, their computational complexity is often prohibitive. Conversely, simpler greedy heuristics that optimize for query coverage alone often fail to find these coherent, joinable sets. This paper frames multi-table retrieval as an iterative search process, arguing this approach offers advantages in scalability, interpretability, and flexibility. We propose a general framework and a concrete instantiation: a fast, effective Greedy Join-Aware Retrieval algorithm that holistically balances relevance, coverage, and joinability. Experiments across 5 NL2SQL benchmarks demonstrate that our iterative method achieves competitive retrieval performance compared to the MIP-based approach while being 4-400x faster depending on the benchmark and search space settings. This work highlights the potential of iterative heuristics for practical, scalable, and composition-aware retrieval.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13057v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>维度与精度：基于BEIR SciFact的高效向量检索中自编码器与量化方法的对比分析
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dimension vs. Precision: A Comparative Analysis of Autoencoders and Quantization for Efficient Vector Retrieval on BEIR SciFact
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Satyanarayan Pati
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究高维浮点向量在密集检索中的存储效率问题，核心方法是通过系统比较维度缩减（自编码器）和精度量化两种压缩策略的性能损失与压缩比权衡。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接研究检索系统的效率优化，这是搜索和推荐领域的核心工程问题，虽然不涉及LLM技术，但对实际部署具有重要指导意义。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:02:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13057v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13057v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Dense retrieval models have become a standard for state-of-the-art information retrieval. However, their high-dimensional, high-precision (float32) vector embeddings create significant storage and memory challenges for real-world deployment. To address this, we conduct a rigorous empirical study on the BEIR SciFact benchmark, evaluating the trade-offs between two primary compression strategies: (1) Dimensionality Reduction via deep Autoencoders (AE), reducing original 384-dim vectors to latent spaces from 384 down to 12, and (2) Precision Reduction via Quantization (float16, int8, and binary). We systematically compare each method by measuring the "performance loss" (or gain) relative to a float32 baseline across a full suite of retrieval metrics (NDCG, MAP, MRR, Recall, Precision) at various k cutoffs. Our results show that int8 scalar quantization provides the most effective "sweet spot," achieving a 4x compression with a negligible [~1-2%] drop in nDCG@10. In contrast, Autoencoders show a graceful degradation but suffer a more significant performance loss at equivalent 4x compression ratios (AE-96). binary quantization was found to be unsuitable for this task due to catastrophic performance drops. This work provides a practical guide for deploying efficient, high-performance retrieval systems.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13719v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于多模态基础模型扩展空间智能
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Scaling Spatial Intelligence with Multimodal Foundation Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhongang Cai, Ruisi Wang, Chenyang Gu, Fanyi Pu, Junxiang Xu, Yubo Wang, Wanqi Y...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究多模态基础模型在空间智能方面的不足问题，核心方法是构建大规模多样化空间能力数据集来系统培养模型的空间理解能力。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文通过多模态基础模型处理空间智能问题，与VLM异构数据建模理念高度相关，但主要聚焦视觉空间而非推荐系统领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:59:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13719v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13719v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.MM</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite remarkable progress, multimodal foundation models still exhibit surprising deficiencies in spatial intelligence. In this work, we explore scaling up multimodal foundation models to cultivate spatial intelligence within the SenseNova-SI family, built upon established multimodal foundations including visual understanding models (i.e., Qwen3-VL and InternVL3) and unified understanding and generation models (i.e., Bagel). We take a principled approach to constructing high-performing and robust spatial intelligence by systematically curating SenseNova-SI-8M: eight million diverse data samples under a rigorous taxonomy of spatial capabilities. SenseNova-SI demonstrates unprecedented performance across a broad range of spatial intelligence benchmarks: 68.7% on VSI-Bench, 43.3% on MMSI, 85.6% on MindCube, 54.6% on ViewSpatial, and 50.1% on SITE, while maintaining strong general multimodal understanding (e.g., 84.9% on MMBench-En). More importantly, we analyze the impact of data scaling, discuss early signs of emergent generalization capabilities enabled by diverse data training, analyze the risk of overfitting and language shortcuts, present a preliminary study on spatial chain-of-thought reasoning, and validate the potential downstream application. SenseNova-SI is an ongoing project, and this report will be updated continuously. All newly trained multimodal foundation models are publicly released to facilitate further research in this direction.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13679v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>QUILL：面向缓存局部可变形注意力机制的算法-架构协同设计
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            QUILL: An Algorithm-Architecture Co-Design for Cache-Local Deformable Attention
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hyunwoo Oh, Hanning Chen, Sanggeon Yun, Yang Ni, Wenjun Huang, Tamoghno Das, Suy...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究可变形Transformer在硬件映射时的内存访问效率问题，核心思想是通过距离感知的乱序查询调度和缓存友好的单次执行架构，将稀疏性转化为局部性来提升计算效率。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于Transformer架构的效率优化，通过算法-架构协同设计解决可变形注意力机制的内存访问问题，直接关联Transformer技术进展领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:34:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13679v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13679v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AR</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Deformable transformers deliver state-of-the-art detection but map poorly to hardware due to irregular memory access and low arithmetic intensity. We introduce QUILL, a schedule-aware accelerator that turns deformable attention into cache-friendly, single-pass work. At its core, Distance-based Out-of-Order Querying (DOOQ) orders queries by spatial proximity; the look-ahead drives a region prefetch into an alternate buffer--forming a schedule-aware prefetch loop that overlaps memory and compute. A fused MSDeformAttn engine executes interpolation, Softmax, aggregation, and the final projection (W''m) in one pass without spilling intermediates, while small tensors are kept on-chip and surrounding dense layers run on integrated GEMMs. Implemented as RTL and evaluated end-to-end, QUILL achieves up to 7.29x higher throughput and 47.3x better energy efficiency than an RTX 4090, and exceeds prior accelerators by 3.26-9.82x in throughput and 2.01-6.07x in energy efficiency. With mixed-precision quantization, accuracy tracks FP32 within <=0.9 AP across Deformable and Sparse DETR variants. By converting sparsity into locality--and locality into utilization--QUILL delivers consistent, end-to-end speedups.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13283v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>TabFlash：通过渐进式问题条件化与令牌聚焦实现高效表格理解
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TabFlash: Efficient Table Understanding with Progressive Question Conditioning and Token Focusing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jongha Kim, Minseong Bae, Sanghyeok Lee, Jinsung Yoon, Hyunwoo J. Kim
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究表格图像理解中的效率和冗余问题，核心思想是通过渐进式问题条件化注入问题信息，结合token剪枝和聚焦训练来生成紧凑且信息丰富的视觉特征。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的渐进式问题条件化和token聚焦方法属于Transformer架构效率优化技术，虽然针对表格图像，但其核心思想可迁移到推荐系统的异构数据处理中。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:00:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13283v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13283v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Table images present unique challenges for effective and efficient understanding due to the need for question-specific focus and the presence of redundant background regions. Existing Multimodal Large Language Model (MLLM) approaches often overlook these characteristics, resulting in uninformative and redundant visual representations. To address these issues, we aim to generate visual features that are both informative and compact to improve table understanding. We first propose progressive question conditioning, which injects the question into Vision Transformer layers with gradually increasing frequency, considering each layer's capacity to handle additional information, to generate question-aware visual features. To reduce redundancy, we introduce a pruning strategy that discards background tokens, thereby improving efficiency. To mitigate information loss from pruning, we further propose token focusing, a training strategy that encourages the model to concentrate essential information in the retained tokens. By combining these approaches, we present TabFlash, an efficient and effective MLLM for table understanding. TabFlash achieves state-of-the-art performance, outperforming both open-source and proprietary MLLMs, while requiring 27% less FLOPs and 30% less memory usage compared to the second-best MLLM.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13166v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>局部协同过滤：一种利用用户间局部相似性的协同过滤方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Local Collaborative Filtering: A Collaborative Filtering Method that Utilizes Local Similarities among Users
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhaoxin Shen, Dan Wu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何更有效地利用用户行为数据改进推荐系统，核心思想是通过局部用户相似性和大数定律整合用户数据，提出局部协同过滤方法。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于传统协同过滤方法的改进，属于推荐系统核心领域，但未涉及LLM或Transformer技术，创新性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:10:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13166v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13166v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.HC</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    To leverage user behavior data from the Internet more effectively in recommender systems, this paper proposes a novel collaborative filtering (CF) method called Local Collaborative Filtering (LCF). LCF utilizes local similarities among users and integrates their data using the law of large numbers (LLN), thereby improving the utilization of user behavior data. Experiments are conducted on the Steam game dataset, and the results of LCF align with real-world needs.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12861v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>从感知到推理：深度思考赋能多模态大语言模型
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            From Perception to Reasoning: Deep Thinking Empowers Multimodal Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenxin Zhu, Andong Chen, Yuchen Song, Kehai Chen, Conghui Zhu, Ziyan Chen, Tieju...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何提升多模态大语言模型的复杂推理能力，核心思想是将语言模型中的思维链推理范式扩展至多模态领域，通过分步推理机制增强模型推理透明度和泛化能力。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文系统综述多模态思维链技术，虽然不直接涉及推荐系统，但其增强模型推理透明度和泛化能力的思想可迁移至搜索和广告领域的复杂决策任务。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 01:22:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12861v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12861v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the remarkable success of Multimodal Large Language Models (MLLMs) in perception tasks, enhancing their complex reasoning capabilities has emerged as a critical research focus. Existing models still suffer from challenges such as opaque reasoning paths and insufficient generalization ability. Chain-of-Thought (CoT) reasoning, which has demonstrated significant efficacy in language models by enhancing reasoning transparency and output interpretability, holds promise for improving model reasoning capabilities when extended to the multimodal domain. This paper provides a systematic review centered on "Multimodal Chain-of-Thought" (MCoT). First, it analyzes the background and theoretical motivations for its inception from the perspectives of technical evolution and task demands. Then, it introduces mainstream MCoT methods from three aspects: CoT paradigms, the post-training stage, and the inference stage, while also analyzing their underlying mechanisms. Furthermore, the paper summarizes existing evaluation benchmarks and metrics, and discusses the application scenarios of MCoT. Finally, it analyzes the challenges currently facing MCoT and provides an outlook on its future research directions.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13125v1" target="_blank" rel="noopener noreferrer">
                用于有效轨迹相似性学习的区域-点联合表示
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Region-Point Joint Representation for Effective Trajectory Similarity Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hao Long, Silin Zhou, Lisi Chen, Shuo Shang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及轨迹相似性学习，这在基于位置的推荐系统中具有潜在应用。然而，它主要关注轨迹数据的表示学习，与核心推荐系统、搜索或广告的直接关联较弱，且未明确涉及LLM或Transformer技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:28:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13125v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13125v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.IR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent learning-based methods have reduced the computational complexity of traditional trajectory similarity computation, but state-of-the-art (SOTA) methods still fail to leverage the comprehensive spectrum of trajectory information for similarity modeling. To tackle this problem, we propose \textbf{RePo}, a novel method that jointly encodes \textbf{Re}gion-wise and \textbf{Po}int-wise features to capture both spatial context and fine-grained moving patterns. For region-wise representation, the GPS trajectories are first mapped to grid sequences, and spatial context are captured by structural features and semantic context enriched by visual features. For point-wise representation, three lightweight expert networks extract local, correlation, and continuous movement patterns from dense GPS sequences. Then, a router network adaptively fuses the learned point-wise features, which are subsequently combined with region-wise features using cross-attention to produce the final trajectory embedding. To train RePo, we adopt a contrastive loss with hard negative samples to provide similarity ranking supervision. Experiment results show that RePo achieves an average accuracy improvement of 22.2\% over SOTA baselines across all evaluation metrics.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12920v1" target="_blank" rel="noopener noreferrer">
                审核谷歌AI概览和精选摘要：关于婴儿护理和孕期的案例研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Auditing Google's AI Overviews and Featured Snippets: A Case Study on Baby Care and Pregnancy
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Desheng Hu, Joachim Baumann, Aleksandra Urman, Elsa Lichtenegger, Robin Forsberg...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注AI系统的审计和案例研究，属于评估和验证范畴，与核心推荐系统、搜索或广告的技术进展相关性较弱。虽然涉及搜索功能（精选摘要），但焦点在于审计而非技术改进或新方法开发，因此潜在应用有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 03:16:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12920v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12920v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CY</span><span class="category-tag">cs.HC</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Google Search increasingly surfaces AI-generated content through features like AI Overviews (AIO) and Featured Snippets (FS), which users frequently rely on despite having no control over their presentation. Through a systematic algorithm audit of 1,508 real baby care and pregnancy-related queries, we evaluate the quality and consistency of these information displays. Our robust evaluation framework assesses multiple quality dimensions, including answer consistency, relevance, presence of medical safeguards, source categories, and sentiment alignment. Our results reveal concerning gaps in information consistency, with information in AIO and FS displayed on the same search result page being inconsistent with each other in 33% of cases. Despite high relevance scores, both features critically lack medical safeguards (present in just 11% of AIO and 7% of FS responses). While health and wellness websites dominate source categories for both, AIO and FS, FS also often link to commercial sources. These findings have important implications for public health information access and demonstrate the need for stronger quality controls in AI-mediated health information. Our methodology provides a transferable framework for auditing AI systems across high-stakes domains where information quality directly impacts user well-being.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13658v1" target="_blank" rel="noopener noreferrer">
                为何“芝加哥”能预测欺骗性评论？利用大型语言模型从词汇线索中发现语言现象
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Why is "Chicago" Predictive of Deceptive Reviews? Using LLMs to Discover Language Phenomena from Lexical Cues
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiaming Qu, Mengtian Guo, Yue Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及LLMs在文本分析中的应用，但其核心焦点是欺骗性评论检测和语言现象发现，这属于内容可信度/质量评估范畴，而非推荐系统、搜索或广告中的核心排序或建模问题。尽管LLMs作为工具被使用，但论文主题更偏向NLP应用和文本分析，与当前关注的RecSys/Search/Ads核心进展或LLM技术直接应用关联较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:15:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13658v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13658v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Deceptive reviews mislead consumers, harm businesses, and undermine trust in online marketplaces. Machine learning classifiers can learn from large amounts of training examples to effectively distinguish deceptive reviews from genuine ones. However, the distinguishing features learned by these classifiers are often subtle, fragmented, and difficult for humans to interpret. In this work, we explore using large language models (LLMs) to translate machine-learned lexical cues into human-understandable language phenomena that can differentiate deceptive reviews from genuine ones. We show that language phenomena obtained in this manner are empirically grounded in data, generalizable across similar domains, and more predictive than phenomena either in LLMs' prior knowledge or obtained through in-context learning. These language phenomena have the potential to aid people in critically assessing the credibility of online reviews in environments where deception detection classifiers are unavailable.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12874v1" target="_blank" rel="noopener noreferrer">
                基于Transformer模型的文本数据中希望情绪分类
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Classification of Hope in Textual Data using Transformer-Based Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chukwuebuka Fortunate Ijezue, Tania-Amanda Fredrick Eneye, Maaz Amjad
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及Transformer模型在文本情感分类中的应用，虽然使用了Transformer架构，但聚焦于特定情感（希望）的分类，与推荐系统、搜索或广告的核心技术关联较弱。在推荐或广告中，情感分析可作为辅助特征，但论文标题未明确展示其在推荐/搜索/广告中的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 02:07:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12874v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12874v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper presents a transformer-based approach for classifying hope expressions in text. We developed and compared three architectures (BERT, GPT-2, and DeBERTa) for both binary classification (Hope vs. Not Hope) and multiclass categorization (five hope-related categories). Our initial BERT implementation achieved 83.65% binary and 74.87% multiclass accuracy. In the extended comparison, BERT demonstrated superior performance (84.49% binary, 72.03% multiclass accuracy) while requiring significantly fewer computational resources (443s vs. 704s training time) than newer architectures. GPT-2 showed lowest overall accuracy (79.34% binary, 71.29% multiclass), while DeBERTa achieved moderate results (80.70% binary, 71.56% multiclass) but at substantially higher computational cost (947s for multiclass training). Error analysis revealed architecture-specific strengths in detecting nuanced hope expressions, with GPT-2 excelling at sarcasm detection (92.46% recall). This study provides a framework for computational analysis of hope, with applications in mental health and social media analysis, while demonstrating that architectural suitability may outweigh model size for specialized emotion detection tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13720v1" target="_blank" rel="noopener noreferrer">
                回归基础：让去噪生成模型专注去噪
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Back to Basics: Let Denoising Generative Models Denoise
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianhong Li, Kaiming He
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文关注去噪生成模型的基础原理和核心能力，属于生成模型技术范畴。虽然去噪技术在推荐系统和搜索中可用于数据清洗和噪声处理，但论文标题暗示的是回归模型本质功能，而非针对推荐/搜索/广告领域的特定应用，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:59:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13720v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13720v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Today's denoising diffusion models do not "denoise" in the classical sense, i.e., they do not directly predict clean images. Rather, the neural networks predict noise or a noised quantity. In this paper, we suggest that predicting clean data and predicting noised quantities are fundamentally different. According to the manifold assumption, natural data should lie on a low-dimensional manifold, whereas noised quantities do not. With this assumption, we advocate for models that directly predict clean data, which allows apparently under-capacity networks to operate effectively in very high-dimensional spaces. We show that simple, large-patch Transformers on pixels can be strong generative models: using no tokenizer, no pre-training, and no extra loss. Our approach is conceptually nothing more than "$\textbf{Just image Transformers}$", or $\textbf{JiT}$, as we call it. We report competitive results using JiT with large patch sizes of 16 and 32 on ImageNet at resolutions of 256 and 512, where predicting high-dimensional noised quantities can fail catastrophically. With our networks mapping back to the basics of the manifold, our research goes back to basics and pursues a self-contained paradigm for Transformer-based diffusion on raw natural data.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13649v1" target="_blank" rel="noopener noreferrer">
                分布匹配蒸馏与强化学习相结合
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Distribution Matching Distillation Meets Reinforcement Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dengyang Jiang, Dongyang Liu, Zanyi Wang, Qilong Wu, Xin Jin, David Liu, Zhen Li...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及强化学习与知识蒸馏的结合，但未明确说明与推荐系统、搜索或广告的相关性。虽然知识蒸馏技术可能应用于模型压缩以提升推荐系统效率，但强化学习在该标题中占主导地位，而根据用户关注点，纯粹的强化学习论文若无明确的推荐系统/搜索/广告应用场景则相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:59:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13649v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13649v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Distribution Matching Distillation (DMD) distills a pre-trained multi-step diffusion model to a few-step one to improve inference efficiency. However, the performance of the latter is often capped by the former. To circumvent this dilemma, we propose DMDR, a novel framework that combines Reinforcement Learning (RL) techniques into the distillation process. We show that for the RL of the few-step generator, the DMD loss itself is a more effective regularization compared to the traditional ones. In turn, RL can help to guide the mode coverage process in DMD more effectively. These allow us to unlock the capacity of the few-step generator by conducting distillation and RL simultaneously. Meanwhile, we design the dynamic distribution guidance and dynamic renoise sampling training strategies to improve the initial distillation process. The experiments demonstrate that DMDR can achieve leading visual quality, prompt coherence among few-step methods, and even exhibit performance that exceeds the multi-step teacher.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13539v1" target="_blank" rel="noopener noreferrer">
                BootOOD：基于神经坍缩下合成样本暴露的自监督分布外检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BootOOD: Self-Supervised Out-of-Distribution Detection via Synthetic Sample Exposure under Neural Collapse
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuanchao Wang, Tian Qin, Eduardo Valle, Bruno Abrahao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注分布外检测的自监督方法，这属于模型鲁棒性和泛化性的技术范畴。虽然分布外检测在推荐和搜索系统中对处理未知项目或查询有一定潜在价值，但论文聚焦于神经坍缩理论框架下的合成样本生成，与当前关注的核心推荐系统进展、LLM技术或Transformer架构改进的直接关联较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:12:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13539v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13539v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Out-of-distribution (OOD) detection is critical for deploying image classifiers in safety-sensitive environments, yet existing detectors often struggle when OOD samples are semantically similar to the in-distribution (ID) classes. We present BootOOD, a fully self-supervised OOD detection framework that bootstraps exclusively from ID data and is explicitly designed to handle semantically challenging OOD samples. BootOOD synthesizes pseudo-OOD features through simple transformations of ID representations and leverages Neural Collapse (NC), where ID features cluster tightly around class means with consistent feature norms. Unlike prior approaches that aim to constrain OOD features into subspaces orthogonal to the collapsed ID means, BootOOD introduces a lightweight auxiliary head that performs radius-based classification on feature norms. This design decouples OOD detection from the primary classifier and imposes a relaxed requirement: OOD samples are learned to have smaller feature norms than ID features, which is easier to satisfy when ID and OOD are semantically close. Experiments on CIFAR-10, CIFAR-100, and ImageNet-200 show that BootOOD outperforms prior post-hoc methods, surpasses training-based methods without outlier exposure, and is competitive with state-of-the-art outlier-exposure approaches while maintaining or improving ID accuracy.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13478v1" target="_blank" rel="noopener noreferrer">
                语义文档反渲染：基于视觉语言建模的SVG重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Semantic Document Derendering: SVG Reconstruction via Vision-Language Modeling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Adam Hazimeh, Ke Wang, Mark Collier, Gilles Baechler, Efi Kokiopoulou, Pascal Fr...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及视觉语言模型和文档结构重建，与VLM类比异构数据的概念有一定关联，可能启发将用户行为序列和上下文特征作为不同模态的统一建模方法。然而，其核心焦点是文档反渲染和SVG重建，与推荐系统、搜索或广告的直接应用距离较远，相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:16:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13478v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13478v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimedia documents such as slide presentations and posters are designed to be interactive and easy to modify. Yet, they are often distributed in a static raster format, which limits editing and customization. Restoring their editability requires converting these raster images back into structured vector formats. However, existing geometric raster-vectorization methods, which rely on low-level primitives like curves and polygons, fall short at this task. Specifically, when applied to complex documents like slides, they fail to preserve the high-level structure, resulting in a flat collection of shapes where the semantic distinction between image and text elements is lost. To overcome this limitation, we address the problem of semantic document derendering by introducing SliDer, a novel framework that uses Vision-Language Models (VLMs) to derender slide images as compact and editable Scalable Vector Graphic (SVG) representations. SliDer detects and extracts attributes from individual image and text elements in a raster input and organizes them into a coherent SVG format. Crucially, the model iteratively refines its predictions during inference in a process analogous to human design, generating SVG code that more faithfully reconstructs the original raster upon rendering. Furthermore, we introduce Slide2SVG, a novel dataset comprising raster-SVG pairs of slide documents curated from real-world scientific presentations, to facilitate future research in this domain. Our results demonstrate that SliDer achieves a reconstruction LPIPS of 0.069 and is favored by human evaluators in 82.9% of cases compared to the strongest zero-shot VLM baseline.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13243v1" target="_blank" rel="noopener noreferrer">
                揭示并缓解多模态模型编辑中的瞬时盲态
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Uncovering and Mitigating Transient Blindness in Multimodal Model Editing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiaoqi Han, Ru Li, Ran Yi, Hongye Tan, Zhuomin Liang, Víctor Gutiérrez-Basulto, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注多模态模型编辑中的瞬时盲态问题，属于模型编辑技术范畴。虽然模型编辑技术可能对推荐/搜索系统中的模型更新有间接启发，但论文标题未明确表明与推荐系统、搜索或广告的直接关联，也未提及Transformer架构效率或异质数据统一建模等核心关注点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:04:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13243v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13243v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal Model Editing (MMED) aims to correct erroneous knowledge in multimodal models. Existing evaluation methods, adapted from textual model editing, overstate success by relying on low-similarity or random inputs, obscure overfitting. We propose a comprehensive locality evaluation framework, covering three key dimensions: random-image locality, no-image locality, and consistent-image locality, operationalized through seven distinct data types, enabling a detailed and structured analysis of multimodal edits. We introduce De-VQA, a dynamic evaluation for visual question answering, uncovering a phenomenon we term transient blindness, overfitting to edit-similar text while ignoring visuals. Token analysis shows edits disproportionately affect textual tokens. We propose locality-aware adversarial losses to balance cross-modal representations. Empirical results demonstrate that our approach consistently outperforms existing baselines, reducing transient blindness and improving locality by 17% on average.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13523v1" target="_blank" rel="noopener noreferrer">
                作为嘈杂文本临床报告稳健OCR替代方案的紧凑多模态语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Compact Multimodal Language Models as Robust OCR Alternatives for Noisy Textual Clinical Reports
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikita Neveditsin, Pawan Lingras, Salil Patil, Swarup Patil, Vijay Mago
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注医疗领域的临床报告处理，这属于明确的无关主题范畴。虽然涉及多模态语言模型技术，但其应用场景是特定的医疗文档OCR替代方案，与搜索、推荐或广告系统的核心需求没有直接关联。医疗领域的专业应用超出了当前关注的技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:58:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13523v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13523v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Digitization of medical records often relies on smartphone photographs of printed reports, producing images degraded by blur, shadows, and other noise. Conventional OCR systems, optimized for clean scans, perform poorly under such real-world conditions. This study evaluates compact multimodal language models as privacy-preserving alternatives for transcribing noisy clinical documents. Using obstetric ultrasound reports written in regionally inflected medical English common to Indian healthcare settings, we compare eight systems in terms of transcription accuracy, noise sensitivity, numeric accuracy, and computational efficiency. Compact multimodal models consistently outperform both classical and neural OCR pipelines. Despite higher computational costs, their robustness and linguistic adaptability position them as viable candidates for on-premises healthcare digitization.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13489v1" target="_blank" rel="noopener noreferrer">
                PolicyBot - 政策文档上的可靠问答系统
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PolicyBot - Reliable Question Answering over Policy Documents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Gautam Nagarajan, Omir Kumar, Sudarsun Santhiappan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于政策文档的问答系统，属于特定领域的文档理解应用。虽然问答技术是LLM的核心能力之一，但政策文档这一特定领域与推荐系统、搜索或广告的核心技术关联度较低，缺乏明确的跨领域应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:26:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13489v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13489v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.ET</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    All citizens of a country are affected by the laws and policies introduced by their government. These laws and policies serve essential functions for citizens. Such as granting them certain rights or imposing specific obligations. However, these documents are often lengthy, complex, and difficult to navigate, making it challenging for citizens to locate and understand relevant information. This work presents PolicyBot, a retrieval-augmented generation (RAG) system designed to answer user queries over policy documents with a focus on transparency and reproducibility. The system combines domain-specific semantic chunking, multilingual dense embeddings, multi-stage retrieval with reranking, and source-aware generation to provide responses grounded in the original documents. We implemented citation tracing to reduce hallucinations and improve user trust, and evaluated alternative retrieval and generation configurations to identify effective design choices. The end-to-end pipeline is built entirely with open-source tools, enabling easy adaptation to other domains requiring document-grounded question answering. This work highlights design considerations, practical challenges, and lessons learned in deploying trustworthy RAG systems for governance-related contexts.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12873v1" target="_blank" rel="noopener noreferrer">
                重新思考过滤气泡？为保护性过滤气泡制定研究议程
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Rethinking the filter bubble? Developing a research agenda for the protective filter bubble
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jacob Erickson
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题讨论过滤气泡概念，这主要涉及推荐系统的社会影响和用户隔离问题，属于公平性、伦理等非技术性话题范畴。虽然过滤气泡与推荐系统相关，但该研究议程更关注保护性方面，属于用户影响和社会后果分析，而非核心推荐算法或技术架构的进步。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 02:03:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12873v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12873v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SI</span><span class="category-tag">cs.CY</span><span class="category-tag">cs.HC</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Filter bubbles and echo chambers have received global attention from scholars, media organizations, and the general public. Filter bubbles have primarily been regarded as intrinsically negative, and many studies have sought to minimize their influence. The detrimental influence of filter bubbles is well-studied. Filter bubbles may, for example, create information silos, amplify misinformation, and promote hatred and extremism. However, comparatively few studies have considered the other side of the filter bubble; its protective benefits, particularly to marginalized communities and those living in countries with low levels of press freedom. Through a review of the literature on digital safe spaces and protective filter bubbles, this commentary suggests that there may be a need to rethink the filter bubble, and it proposes several areas for future research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13590v1" target="_blank" rel="noopener noreferrer">
                超越SELECT：面向真实世界文本到SQL转换的全面分类法指导基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond SELECT: A Comprehensive Taxonomy-Guided Benchmark for Real-World Text-to-SQL Translation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hao Wang, Yuanfeng Song, Xiaoming Yin, Xing Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本到SQL转换的基准测试，属于数据库查询领域的特定任务，与推荐系统、搜索或广告的核心技术关联性较弱。虽然SQL查询在数据检索中有应用，但该工作主要关注数据库接口的自然语言处理，缺乏对推荐、搜索或广告系统的直接技术贡献或应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:52:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13590v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13590v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text-to-SQL datasets are essential for training and evaluating text-to-SQL models, but existing datasets often suffer from limited coverage and fail to capture the diversity of real-world applications. To address this, we propose a novel taxonomy for text-to-SQL classification based on dimensions including core intents, statement types, syntax structures, and key actions. Using this taxonomy, we evaluate widely used public text-to-SQL datasets (e.g., Spider and Bird) and reveal limitations in their coverage and diversity. We then introduce a taxonomy-guided dataset synthesis pipeline, yielding a new dataset named SQL-Synth. This approach combines the taxonomy with Large Language Models (LLMs) to ensure the dataset reflects the breadth and complexity of real-world text-to-SQL applications. Extensive analysis and experimental results validate the effectiveness of our taxonomy, as SQL-Synth exhibits greater diversity and coverage compared to existing benchmarks. Moreover, we uncover that existing LLMs typically fall short in adequately capturing the full range of scenarios, resulting in limited performance on SQL-Synth. However, fine-tuning can substantially improve their performance in these scenarios. The proposed taxonomy has significant potential impact, as it not only enables comprehensive analysis of datasets and the performance of different LLMs, but also guides the construction of training data for LLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13505v1" target="_blank" rel="noopener noreferrer">
                应用大型语言模型刻画公共叙事
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Applying Large Language Models to Characterize Public Narratives
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Elinor Poole-Dayan, Daniel T Kessler, Hannah Chiou, Margaret Hughes, Emily S Lin...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明研究重点是使用LLMs分析公共叙事，这属于LLM在社会学或政治学领域的应用，而非推荐系统、搜索或广告的核心技术。虽然涉及LLM应用，但缺乏与RecSys/Search/Ads领域的直接关联或潜在应用场景，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:41:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13505v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13505v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Public Narratives (PNs) are key tools for leadership development and civic mobilization, yet their systematic analysis remains challenging due to their subjective interpretation and the high cost of expert annotation. In this work, we propose a novel computational framework that leverages large language models (LLMs) to automate the qualitative annotation of public narratives. Using a codebook we co-developed with subject-matter experts, we evaluate LLM performance against that of expert annotators. Our work reveals that LLMs can achieve near-human-expert performance, achieving an average F1 score of 0.80 across 8 narratives and 14 codes. We then extend our analysis to empirically explore how PN framework elements manifest across a larger dataset of 22 stories. Lastly, we extrapolate our analysis to a set of political speeches, establishing a novel lens in which to analyze political rhetoric in civic spaces. This study demonstrates the potential of LLM-assisted annotation for scalable narrative analysis and highlights key limitations and directions for future research in computational civic storytelling.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13225v1" target="_blank" rel="noopener noreferrer">
                眼见非耳闻：在解读频谱图任务上对视觉语言模型进行基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Seeing isn't Hearing: Benchmarking Vision Language Models at Interpreting Spectrograms
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tyler Loakman, Joseph James, Chenghua Lin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉语言模型在音频频谱图理解上的基准测试，属于纯粹的VLM评估研究。虽然涉及多模态建模，但主要关注音频-视觉模态，与推荐系统、搜索或广告中的异构数据处理没有直接关联，且缺乏明确的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:41:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13225v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13225v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rise of Large Language Models (LLMs) and their vision-enabled counterparts (VLMs), numerous works have investigated their capabilities in tasks that fuse the modalities of vision and language. In this work, we benchmark the extent to which VLMs are able to act as highly-trained phoneticians, interpreting spectrograms and waveforms of speech. To do this, we synthesise a novel dataset containing 4k+ English words spoken in isolation alongside stylistically consistent spectrogram and waveform figures. We test the ability of VLMs to understand these representations of speech through a multiple-choice task whereby models must predict the correct phonemic or graphemic transcription of a spoken word when presented amongst 3 distractor transcriptions that have been selected based on their phonemic edit distance to the ground truth. We observe that both zero-shot and finetuned models rarely perform above chance, demonstrating the requirement for specific parametric knowledge of how to interpret such figures, rather than paired samples alone.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13152v1" target="_blank" rel="noopener noreferrer">
                使用大语言模型生成伪标签的零样本语法能力评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Zero-Shot Grammar Competency Estimation Using Large Language Model Generated Pseudo Labels
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sourya Dipta Das, Shubham Kumar, Kuldeep Yadav
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM在语法能力评估方面的应用，这属于纯粹的NLP评估任务，与推荐系统、搜索或广告的核心技术无关。虽然涉及LLM技术，但没有展示在RecSys/Search/Ads领域的潜在应用价值，属于纯粹的LLM评估基准研究范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:00:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13152v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13152v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Grammar competency estimation is essential for assessing linguistic proficiency in both written and spoken language; however, the spoken modality presents additional challenges due to its spontaneous, unstructured, and disfluent nature. Developing accurate grammar scoring models further requires extensive expert annotation, making large-scale data creation impractical. To address these limitations, we propose a zero-shot grammar competency estimation framework that leverages unlabeled data and Large Language Models (LLMs) without relying on manual labels. During training, we employ LLM-generated predictions on unlabeled data by using grammar competency rubric-based prompts. These predictions, treated as pseudo labels, are utilized to train a transformer-based model through a novel training framework designed to handle label noise effectively. We show that the choice of LLM for pseudo-label generation critically affects model performance and that the ratio of clean-to-noisy samples during training strongly influences stability and accuracy. Finally, a qualitative analysis of error intensity and score prediction confirms the robustness and interpretability of our approach. Experimental results demonstrate the efficacy of our approach in estimating grammar competency scores with high accuracy, paving the way for scalable, low-resource grammar assessment systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13126v1" target="_blank" rel="noopener noreferrer">
                孤立手语识别中循环与注意力架构的对比分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Comparative Analysis of Recurrent and Attention Architectures for Isolated Sign Language Recognition
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nigar Alishzade, Gulchin Abdullayeva
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于手语识别这一特定领域应用，属于计算机视觉中的动作识别子领域。虽然提到了注意力机制，但这是针对手语这一特定模态的孤立识别任务，与推荐系统、搜索或广告中的用户行为序列建模、多模态内容理解等核心问题缺乏直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:28:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13126v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13126v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This study presents a systematic comparative analysis of recurrent and attention-based neural architectures for isolated sign language recognition. We implement and evaluate two representative models-ConvLSTM and Vanilla Transformer-on the Azerbaijani Sign Language Dataset (AzSLD) and the Word-Level American Sign Language (WLASL) dataset. Our results demonstrate that the attention-based Vanilla Transformer consistently outperforms the recurrent ConvLSTM in both Top-1 and Top-5 accuracy across datasets, achieving up to 76.8% Top-1 accuracy on AzSLD and 88.3% on WLASL. The ConvLSTM, while more computationally efficient, lags in recognition accuracy, particularly on smaller datasets. These findings highlight the complementary strengths of each paradigm: the Transformer excels in overall accuracy and signer independence, whereas the ConvLSTM offers advantages in computational efficiency and temporal modeling. The study provides a nuanced analysis of these trade-offs, offering guidance for architecture selection in sign language recognition systems depending on application requirements and resource constraints.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13118v1" target="_blank" rel="noopener noreferrer">
                像提取代码一样提取事件：用于零样本事件提取的多智能体编程框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Extracting Events Like Code: A Multi-Agent Programming Framework for Zero-Shot Event Extraction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Quanjiang Guo, Sijie Wang, Jinchuan Zhang, Ben Zhang, Zhao Kang, Ling Tian, Ke Y...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注事件提取这一特定NLP任务，采用多智能体编程框架实现零样本能力。虽然事件提取在通用NLP中有应用，但与推荐系统、搜索或广告的核心技术（如排序、召回、用户建模）缺乏直接关联，且未明确展示在RecSys/Search/Ads领域的应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:17:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13118v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13118v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Zero-shot event extraction (ZSEE) remains a significant challenge for large language models (LLMs) due to the need for complex reasoning and domain-specific understanding. Direct prompting often yields incomplete or structurally invalid outputs--such as misclassified triggers, missing arguments, and schema violations. To address these limitations, we present Agent-Event-Coder (AEC), a novel multi-agent framework that treats event extraction like software engineering: as a structured, iterative code-generation process. AEC decomposes ZSEE into specialized subtasks--retrieval, planning, coding, and verification--each handled by a dedicated LLM agent. Event schemas are represented as executable class definitions, enabling deterministic validation and precise feedback via a verification agent. This programming-inspired approach allows for systematic disambiguation and schema enforcement through iterative refinement. By leveraging collaborative agent workflows, AEC enables LLMs to produce precise, complete, and schema-consistent extractions in zero-shot settings. Experiments across five diverse domains and six LLMs demonstrate that AEC consistently outperforms prior zero-shot baselines, showcasing the power of treating event extraction like code generation. The code and data are released on https://github.com/UESTC-GQJ/Agent-Event-Coder.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13091v1" target="_blank" rel="noopener noreferrer">
                STEP：成功率感知的轨迹高效策略优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            STEP: Success-Rate-Aware Trajectory-Efficient Policy Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuhan Chen, Yuxuan Liu, Long Zhang, Pengzhi Gao, Jian Luan, Wei Liu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明其专注于强化学习中的策略优化方法，强调轨迹效率和成功率。虽然强化学习在推荐系统和广告中有潜在应用，但标题没有明确表明与RecSys/Search/Ads领域的直接关联，也没有提到任何与LLM、Transformer或异构数据建模相关的技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:43:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13091v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13091v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multi-turn interaction remains challenging for online reinforcement learning. A common solution is trajectory-level optimization, which treats each trajectory as a single training sample. However, this approach can be inefficient and yield misleading learning signals: it applies uniform sampling across tasks regardless of difficulty, penalizes correct intermediate actions in failed trajectories, and incurs high sample-collection costs. To address these issues, we propose STEP (Success-rate-aware Trajectory-Efficient Policy optimization), a framework that dynamically allocates sampling based on per-task success rates and performs step-level optimization. STEP maintains a smoothed success-rate record to guide adaptive trajectory resampling, allocating more effort to harder tasks. It then computes success-rate-weighted advantages and decomposes trajectories into step-level samples. Finally, it applies a step-level GRPO augmentation to refine updates for low-success tasks. Experiments on OSWorld and AndroidWorld show that STEP substantially improves sample efficiency and training stability over trajectory-level GRPO, converging faster and generalizing better under the same sampling budget.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13040v1" target="_blank" rel="noopener noreferrer">
                BLI作为对齐度量有多好：词嵌入范式下的一项研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            How Good is BLI as an Alignment Measure: A Study in Word Embedding Paradigm
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kasun Wickramasinghe, Nisansa de Silva
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究词嵌入空间中的跨语言对齐评估，属于传统NLP中的词向量技术。虽然词嵌入是LLM的基础组件之一，但该研究聚焦于跨语言对齐的评估方法，与推荐系统、搜索或广告的核心技术关联度较低，且未涉及Transformer架构改进或直接应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 06:41:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13040v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13040v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Sans a dwindling number of monolingual embedding studies originating predominantly from the low-resource domains, it is evident that multilingual embedding has become the de facto choice due to its adaptability to the usage of code-mixed languages, granting the ability to process multilingual documents in a language-agnostic manner, as well as removing the difficult task of aligning monolingual embeddings. But is this victory complete? Are the multilingual models better than aligned monolingual models in every aspect? Can the higher computational cost of multilingual models always be justified? Or is there a compromise between the two extremes? Bilingual Lexicon Induction is one of the most widely used metrics in terms of evaluating the degree of alignment between two embedding spaces. In this study, we explore the strengths and limitations of BLI as a measure to evaluate the degree of alignment of two embedding spaces. Further, we evaluate how well traditional embedding alignment techniques, novel multilingual models, and combined alignment techniques perform BLI tasks in the contexts of both high-resource and low-resource languages. In addition to that, we investigate the impact of the language families to which the pairs of languages belong. We identify that BLI does not measure the true degree of alignment in some cases and we propose solutions for them. We propose a novel stem-based BLI approach to evaluate two aligned embedding spaces that take into account the inflected nature of languages as opposed to the prevalent word-based BLI techniques. Further, we introduce a vocabulary pruning technique that is more informative in showing the degree of the alignment, especially performing BLI on multilingual embedding models. Often, combined embedding alignment techniques perform better while in certain cases multilingual embeddings perform better (mainly low-resource language cases).
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13029v1" target="_blank" rel="noopener noreferrer">
                AA-全知：评估大型语言模型中的跨领域知识可靠性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AA-Omniscience: Evaluating Cross-Domain Knowledge Reliability in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Declan Jackson, William Keating, George Cameron, Micah Hill-Smith
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的跨领域知识可靠性评估，这属于纯粹的LLM评估基准研究，与您的关注点无关。虽然涉及LLM技术，但论文聚焦于知识可靠性的基准测试，没有展示在推荐系统、搜索或广告中的潜在应用价值，因此相关性很低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 06:27:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13029v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13029v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Existing language model evaluations primarily measure general capabilities, yet reliable use of these models across a range of domains demands factual accuracy and recognition of knowledge gaps. We introduce AA-Omniscience, a benchmark designed to measure both factual recall and knowledge calibration across 6,000 questions. Questions are derived from authoritative academic and industry sources, and cover 42 economically relevant topics within six different domains. The evaluation measures a model's Omniscience Index, a bounded metric (-100 to 100) measuring factual recall that jointly penalizes hallucinations and rewards abstention when uncertain, with 0 equating to a model that answers questions correctly as much as it does incorrectly. Among evaluated models, Claude 4.1 Opus attains the highest score (4.8), making it one of only three models to score above zero. These results reveal persistent factuality and calibration weaknesses across frontier models. Performance also varies by domain, with the models from three different research labs leading across the six domains. This performance variability suggests models should be chosen according to the demands of the use case rather than general performance for tasks where knowledge is important.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13021v1" target="_blank" rel="noopener noreferrer">
                PragWorld：一个在最小语言修改和对话动态下评估LLM局部世界模型的基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PragWorld: A Benchmark Evaluating LLMs' Local World Model under Minimal Linguistic Alterations and Conversational Dynamics
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sachin Vashistha, Aryan Bibhuti, Atharva Naik, Martin Tutek, Somak Aditya
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的世界模型评估基准，属于纯粹的NLP评估和基准测试范畴。虽然提到了对话动态，但核心是评估LLM的内部表示能力，而非直接应用于推荐系统、搜索或广告的实际任务。这种基准测试研究缺乏明确的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 06:17:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13021v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13021v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Real-world conversations are rich with pragmatic elements, such as entity mentions, references, and implicatures. Understanding such nuances is a requirement for successful natural communication, and often requires building a local world model which encodes such elements and captures the dynamics of their evolving states. However, it is not well-understood whether language models (LMs) construct or maintain a robust implicit representation of conversations. In this work, we evaluate the ability of LMs to encode and update their internal world model in dyadic conversations and test their malleability under linguistic alterations. To facilitate this, we apply seven minimal linguistic alterations to conversations sourced from popular datasets and construct two benchmarks comprising yes-no questions. We evaluate a wide range of open and closed source LMs and observe that they struggle to maintain robust accuracy. Our analysis unveils that LMs struggle to memorize crucial details, such as tracking entities under linguistic alterations to conversations. We then propose a dual-perspective interpretability framework which identifies transformer layers that are useful or harmful and highlights linguistic alterations most influenced by harmful layers, typically due to encoding spurious signals or relying on shortcuts. Inspired by these insights, we propose two layer-regularization based fine-tuning strategies that suppress the effect of the harmful layers.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12991v1" target="_blank" rel="noopener noreferrer">
                微调后的LLM知道它们不知道：一种参数高效的方法来恢复诚实性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Fine-Tuned LLMs Know They Don't Know: A Parameter-Efficient Approach to Recovering Honesty
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zeyu Shi, Ziming Wang, Tianyu Chen, Shiqi Gao, Haoyi Zhou, Qingyun Sun, Jianxin ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的诚实性和不确定性校准，这属于LLM评估和可靠性领域。虽然诚实性在推荐和搜索系统中可能有间接价值，但该研究更偏向纯粹的NLP可靠性问题，而非直接应用于推荐系统、搜索或广告的核心技术。论文没有明确展示在推荐/搜索/广告中的具体应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 05:30:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12991v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12991v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The honesty of Large Language Models (LLMs) is increasingly important for safe deployment in high-stakes domains. However, this crucial trait is severely undermined by supervised fine-tuning (SFT), a common technique for model specialization. Existing recovery methods rely on data-intensive global parameter adjustments, implicitly assuming that SFT deeply corrupts the models' ability to recognize their knowledge boundaries. However, we observe that fine-tuned LLMs still preserve this ability; what is damaged is their capacity to faithfully express that awareness. Building on this, we propose Honesty-Critical Neurons Restoration (HCNR) to surgically repair this suppressed capacity. HCNR identifies and restores key expression-governing neurons to their pre-trained state while harmonizing them with task-oriented neurons via Hessian-guided compensation. Experiments on four QA tasks and five LLM families demonstrate that HCNR effectively recovers 33.25% of the compromised honesty while achieving at least 2.23x speedup with over 10x less data compared to baseline methods, offering a practical solution for trustworthy LLM deployment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12928v1" target="_blank" rel="noopener noreferrer">
                视觉房间2.0：对于多模态大语言模型而言，看见不等于理解
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Visual Room 2.0: Seeing is Not Understanding for MLLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haokun Li, Yazhou Zhang, Jizhi Ding, Qiuchi Li, Peng Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态大语言模型在视觉理解方面的局限性，属于纯视觉和多模态理解领域。虽然标题提到MLLMs，但核心焦点是视觉理解能力评估，与推荐系统、搜索或广告中的异构数据处理没有直接关联。论文没有显示出在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 03:34:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12928v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12928v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Can multi-modal large language models (MLLMs) truly understand what they can see? Extending Searle's Chinese Room into the multi-modal domain, this paper proposes the Visual Room argument: MLLMs may describe every visual detail precisely yet fail to comprehend the underlying emotions and intentions, namely seeing is not understanding. Building on this, we introduce \textit{Visual Room} 2.0, a hierarchical benchmark for evaluating perception-cognition alignment of MLLMs. We model human perceptive and cognitive processes across three levels: low, middle, and high, covering 17 representative tasks. The perception component ranges from attribute recognition to scene understanding, while the cognition component extends from textual entailment to causal and social reasoning. The dataset contains 350 multi-modal samples, each with six progressive questions (2,100 in total) spanning perception to cognition. Evaluating 10 state-of-the-art (SoTA) MLLMs, we highlight three key findings: (1) MLLMs exhibit stronger perceptual competence than cognitive ability (8.0\%$\uparrow$); (2) cognition appears not causally dependent on perception-based reasoning; and (3) cognition scales with model size, but perception does not consistently improve with larger variants. This work operationalizes Seeing $\ne$ Understanding as a testable hypothesis, offering a new paradigm from perceptual processing to cognitive reasoning in MLLMs. Our dataset is available at https://huggingface.co/datasets/LHK2003/PCBench.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12850v1" target="_blank" rel="noopener noreferrer">
                量化潜在狄利克雷分配的一致性和准确性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Quantifying consistency and accuracy of Latent Dirichlet Allocation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Saranzaya Magsarjav, Melissa Humphries, Jonathan Tuke, Lewis Mitchell
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于LDA（潜在狄利克雷分配）模型的评估方法，属于传统主题建模技术。虽然主题建模在搜索和推荐中有历史应用，但本文聚焦于模型评估而非核心架构创新，与现代LLM/Transformer技术或直接应用关联较弱。其潜在应用仅限于传统内容理解任务，与当前焦点中的前沿技术趋势相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 00:44:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12850v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12850v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Topic modelling in Natural Language Processing uncovers hidden topics in large, unlabelled text datasets. It is widely applied in fields such as information retrieval, content summarisation, and trend analysis across various disciplines. However, probabilistic topic models can produce different results when rerun due to their stochastic nature, leading to inconsistencies in latent topics. Factors like corpus shuffling, rare text removal, and document elimination contribute to these variations. This instability affects replicability, reliability, and interpretation, raising concerns about whether topic models capture meaningful topics or just noise. To address these problems, we defined a new stability measure that incorporates accuracy and consistency and uses the generative properties of LDA to generate a new corpus with ground truth. These generated corpora are run through LDA 50 times to determine the variability in the output. We show that LDA can correctly determine the underlying number of topics in the documents. We also find that LDA is more internally consistent, as the multiple reruns return similar topics; however, these topics are not the true topics.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13715v1" target="_blank" rel="noopener noreferrer">
                跨镜头分割任意对象：一种方法及基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Segment Anything Across Shots: A Method and Benchmark
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hengrui Hu, Kaining Ying, Henghui Ding
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉中的通用分割任务，特别是跨镜头分割，这与推荐系统、搜索或广告的核心技术关联较弱。虽然视觉模型在某些推荐场景中可能作为辅助特征，但该工作缺乏明确的RecSys/Search/Ads应用潜力，且未涉及LLM、Transformer架构或异构数据统一建模等关键技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:58:40
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13715v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13715v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This work focuses on multi-shot semi-supervised video object segmentation (MVOS), which aims at segmenting the target object indicated by an initial mask throughout a video with multiple shots. The existing VOS methods mainly focus on single-shot videos and struggle with shot discontinuities, thereby limiting their real-world applicability. We propose a transition mimicking data augmentation strategy (TMA) which enables cross-shot generalization with single-shot data to alleviate the severe annotated multi-shot data sparsity, and the Segment Anything Across Shots (SAAS) model, which can detect and comprehend shot transitions effectively. To support evaluation and future study in MVOS, we introduce Cut-VOS, a new MVOS benchmark with dense mask annotations, diverse object categories, and high-frequency transitions. Extensive experiments on YouMVOS and Cut-VOS demonstrate that the proposed SAAS achieves state-of-the-art performance by effectively mimicking, understanding, and segmenting across complex transitions. The code and datasets are released at https://henghuiding.com/SAAS/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13714v1" target="_blank" rel="noopener noreferrer">
                UnSAMv2：自监督学习实现任意粒度下的任意分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UnSAMv2: Self-Supervised Learning Enables Segment Anything at Any Granularity
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junwei Yu, Trevor Darrell, XuDong Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的图像分割技术，属于纯粹的视觉研究方向。虽然自监督学习是重要的技术方向，但论文标题明确指向视觉分割任务，与推荐系统、搜索或广告的核心技术栈没有直接关联，也没有明显的跨模态应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:58:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13714v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13714v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The Segment Anything Model (SAM) family has become a widely adopted vision foundation model, but its ability to control segmentation granularity remains limited. Users often need to refine results manually - by adding more prompts or selecting from pre-generated masks - to achieve the desired level of detail. This process can be ambiguous, as the same prompt may correspond to several plausible masks, and collecting dense annotations across all granularities is prohibitively expensive, making supervised solutions infeasible. To address this limitation, we introduce UnSAMv2, which enables segment anything at any granularity without human annotations. UnSAMv2 extends the divide-and-conquer strategy of UnSAM by discovering abundant mask-granularity pairs and introducing a novel granularity control embedding that enables precise, continuous control over segmentation scale. Remarkably, with only $6$K unlabeled images and $0.02\%$ additional parameters, UnSAMv2 substantially enhances SAM-2, achieving segment anything at any granularity across interactive, whole-image, and video segmentation tasks. Evaluated on over $11$ benchmarks, UnSAMv2 improves $\text{NoC}_{90}$ (5.69 $\rightarrow$ 4.75), 1-IoU (58.0 $\rightarrow$ 73.1), and $\text{AR}_{1000}$ (49.6 $\rightarrow$ 68.3), showing that small amounts of unlabeled data with a granularity-aware self-supervised learning method can unlock the potential of vision foundation models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13647v1" target="_blank" rel="noopener noreferrer">
                Part-X-MLLM：部件感知的三维多模态大语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Part-X-MLLM: Part-aware 3D Multimodal Large Language Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chunshi Wang, Junliang Ye, Yunhan Yang, Yang Li, Zizhuo Lin, Jun Zhu, Zhuo Chen,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D多模态大语言模型，属于计算机视觉和3D理解领域，与推荐系统、搜索或广告的核心技术关联度较低。虽然多模态建模概念可能启发异构数据处理，但3D部件感知的具体应用场景在RecSys/Search/Ads中缺乏明确的实用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:59:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13647v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13647v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce Part-X-MLLM, a native 3D multimodal large language model that unifies diverse 3D tasks by formulating them as programs in a structured, executable grammar. Given an RGB point cloud and a natural language prompt, our model autoregressively generates a single, coherent token sequence encoding part-level bounding boxes, semantic descriptions, and edit commands. This structured output serves as a versatile interface to drive downstream geometry-aware modules for part-based generation and editing. By decoupling the symbolic planning from the geometric synthesis, our approach allows any compatible geometry engine to be controlled through a single, language-native frontend. We pre-train a dual-encoder architecture to disentangle structure from semantics and instruction-tune the model on a large-scale, part-centric dataset. Experiments demonstrate that our model excels at producing high-quality, structured plans, enabling state-of-the-art performance in grounded Q\&A, compositional generation, and localized editing through one unified interface. Project page: https://chunshi.wang/Part-X-MLLM/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13644v1" target="_blank" rel="noopener noreferrer">
                CacheFlow：用于高效长视频理解的压缩流式内存
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CacheFlow: Compressive Streaming Memory for Efficient Long-Form Video Understanding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shrenik Patel, Daivik Patel
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频理解领域的效率优化技术，虽然提到了内存压缩和流式处理等效率改进，但这些技术主要针对视频模态，与推荐系统、搜索或广告的核心技术栈关联较弱。压缩内存技术可能对处理长序列数据有一定启发，但缺乏明确的RecSys/Search/Ads应用场景的直接连接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:56:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13644v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13644v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Long-form video question answering (VQA) overwhelms current vision-language models (VLMs) because attention and key-value (KV) caches grow with runtime, forcing either expensive inference or near-sighted sliding windows. We introduce CacheFlow, a training-free pipeline that pairs Dynamic Token Dropping (DTD) with a compressive long-term memory. DTD prunes per-patch tokens online via cosine similarity to the previous frame, and surviving tokens are packed into fixed-size blocks. This online, per-frame processing makes our approach fundamentally suited for live streaming VQA. As blocks are processed, each one's keys are summarized by a tiny recurrent encoder to form a retrieval index, while the block's full KV pairs are offloaded and later rehydrated for generation, preserving answer fidelity. At inference, a consensus-based retrieval mechanism retrieves only the Top-K most relevant blocks and attends over both the retrieved and local context for precise, long-range reasoning. CacheFlow is drop-in, architecture-agnostic, and requires no fine-tuning. Experiments on both offline and streaming VQA benchmarks demonstrate that CacheFlow outperforms current strong baselines, while processing up to 87% less tokens. Our dual approach enables VLMs to be both efficient and context-aware, paving the way for practical long-form video understanding.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13587v1" target="_blank" rel="noopener noreferrer">
                VVS：通过部分验证跳过加速视觉自回归生成的推测解码
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haotian Dong, Ye Li, Rongwei Lu, Chen Tang, Shu-Tao Xia, Zhi Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉自回归生成的推测解码加速技术，属于计算机视觉领域的效率优化。虽然推测解码技术本身具有通用性，但论文明确聚焦于视觉生成任务，与推荐系统、搜索或广告的核心技术栈关联度较低。在推荐/搜索/广告场景中，文本生成而非视觉生成是主要应用方向，因此潜在应用价值有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:50:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13587v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13587v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Visual autoregressive (AR) generation models have demonstrated strong potential for image generation, yet their next-token-prediction paradigm introduces considerable inference latency. Although speculative decoding (SD) has been proven effective for accelerating visual AR models, its "draft one step, then verify one step" paradigm prevents a direct reduction of the forward passes, thus restricting acceleration potential. Motivated by the visual token interchangeability, we for the first time to explore verification skipping in the SD process of visual AR model generation to explicitly cut the number of target model forward passes, thereby reducing inference latency. Based on an analysis of the drafting stage's characteristics, we observe that verification redundancy and stale feature reusability are key factors to retain generation quality and speedup for verification-free steps. Inspired by these two observations, we propose a novel SD framework VVS to accelerate visual AR generation via partial verification skipping, which integrates three complementary modules: (1) a verification-free token selector with dynamical truncation, (2) token-level feature caching and reuse, and (3) fine-grained skipped step scheduling. Consequently, VVS reduces the number of target model forward passes by a factor of $2.8\times$ relative to vanilla AR decoding while maintaining competitive generation quality, offering a superior speed-quality trade-off over conventional SD frameworks and revealing strong potential to reshape the SD paradigm.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13575v1" target="_blank" rel="noopener noreferrer">
                基于图像和文本的人员重识别的分层提示学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Hierarchical Prompt Learning for Image- and Text-Based Person Re-Identification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Linhan Zhou, Shuang Li, Neng Dong, Yonghang Tai, Yafei Zhang, Huafeng Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的人员重识别任务，虽然涉及多模态学习（图像和文本），但其核心应用场景是安防监控而非推荐系统、搜索或广告。论文提出的分层提示学习方法在技术思路上可能与VLM处理异构数据有相似之处，但这种关联性过于间接，缺乏明确的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:39:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13575v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13575v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Person re-identification (ReID) aims to retrieve target pedestrian images given either visual queries (image-to-image, I2I) or textual descriptions (text-to-image, T2I). Although both tasks share a common retrieval objective, they pose distinct challenges: I2I emphasizes discriminative identity learning, while T2I requires accurate cross-modal semantic alignment. Existing methods often treat these tasks separately, which may lead to representation entanglement and suboptimal performance. To address this, we propose a unified framework named Hierarchical Prompt Learning (HPL), which leverages task-aware prompt modeling to jointly optimize both tasks. Specifically, we first introduce a Task-Routed Transformer, which incorporates dual classification tokens into a shared visual encoder to route features for I2I and T2I branches respectively. On top of this, we develop a hierarchical prompt generation scheme that integrates identity-level learnable tokens with instance-level pseudo-text tokens. These pseudo-tokens are derived from image or text features via modality-specific inversion networks, injecting fine-grained, instance-specific semantics into the prompts. Furthermore, we propose a Cross-Modal Prompt Regularization strategy to enforce semantic alignment in the prompt token space, ensuring that pseudo-prompts preserve source-modality characteristics while enhancing cross-modal transferability. Extensive experiments on multiple ReID benchmarks validate the effectiveness of our method, achieving state-of-the-art performance on both I2I and T2I tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13545v1" target="_blank" rel="noopener noreferrer">
                多模态对比学习的鲁棒防御策略：针对后门攻击的高效微调
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Robust Defense Strategies for Multimodal Contrastive Learning: Efficient Fine-tuning Against Backdoor Attacks
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Md. Iqbal Hossain, Afia Sajeeda, Neeresh Kumar Perla, Ming Shao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态对比学习中的安全防御问题，属于安全/隐私保护领域，这在Irrelevant Topics中明确排除。虽然涉及多模态学习，但核心焦点是防御后门攻击而非技术架构创新，对RecSys/Search/Ads领域的直接应用价值有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:16:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13545v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13545v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The advent of multimodal deep learning models, such as CLIP, has unlocked new frontiers in a wide range of applications, from image-text understanding to classification tasks. However, these models are not safe for adversarial attacks, particularly backdoor attacks, which can subtly manipulate model behavior. Moreover, existing defense methods typically involve training from scratch or fine-tuning using a large dataset without pinpointing the specific labels that are affected. In this study, we introduce an innovative strategy to enhance the robustness of multimodal contrastive learning models against such attacks. In particular, given a poisoned CLIP model, our approach can identify the backdoor trigger and pinpoint the victim samples and labels in an efficient manner. To that end, an image segmentation ``oracle'' is introduced as the supervisor for the output of the poisoned CLIP. We develop two algorithms to rectify the poisoned model: (1) differentiating between CLIP and Oracle's knowledge to identify potential triggers; (2) pinpointing affected labels and victim samples, and curating a compact fine-tuning dataset. With this knowledge, we are allowed to rectify the poisoned CLIP model to negate backdoor effects. Extensive experiments on visual recognition benchmarks demonstrate our strategy is effective in CLIP-based backdoor defense.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13533v1" target="_blank" rel="noopener noreferrer">
                极小极大多目标保形预测及其在成像逆问题中的应用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Minimax Multi-Target Conformal Prediction with Applications to Imaging Inverse Problems
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jeffrey Wen, Rizwan Ahmad, Philip Schniter
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注保形预测和成像逆问题，属于不确定性量化和计算机视觉领域。虽然保形预测在理论上可以应用于推荐系统的置信度校准，但论文明确聚焦于成像应用，与推荐、搜索或广告系统的核心关注点关联较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:06:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13533v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13533v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In ill-posed imaging inverse problems, uncertainty quantification remains a fundamental challenge, especially in safety-critical applications. Recently, conformal prediction has been used to quantify the uncertainty that the inverse problem contributes to downstream tasks like image classification, image quality assessment, fat mass quantification, etc. While existing works handle only a scalar estimation target, practical applications often involve multiple targets. In response, we propose an asymptotically minimax approach to multi-target conformal prediction that provides tight prediction intervals while ensuring joint marginal coverage. We then outline how our minimax approach can be applied to multi-metric blind image quality assessment, multi-task uncertainty quantification, and multi-round measurement acquisition. Finally, we numerically demonstrate the benefits of our minimax method, relative to existing multi-target conformal prediction methods, using both synthetic and magnetic resonance imaging (MRI) data.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13494v1" target="_blank" rel="noopener noreferrer">
                基于语言引导的视觉-语言模型不变性探测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Language-Guided Invariance Probing of Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jae Joong Lee
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉-语言模型的不变性探测，属于纯粹的VLM分析研究，没有明确展示在推荐系统、搜索或广告领域的应用潜力。虽然标题涉及多模态模型，但缺乏将这种不变性探测技术转化为RecSys/Search/Ads实际应用的明确路径。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:35:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13494v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13494v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent vision-language models (VLMs) such as CLIP, OpenCLIP, EVA02-CLIP and SigLIP achieve strong zero-shot performance, but it is unclear how reliably they respond to controlled linguistic perturbations. We introduce Language-Guided Invariance Probing (LGIP), a benchmark that measures (i) invariance to meaning-preserving paraphrases and (ii) sensitivity to meaning-changing semantic flips in image-text matching. Using 40k MS COCO images with five human captions each, we automatically generate paraphrases and rule-based flips that alter object category, color or count, and summarize model behavior with an invariance error, a semantic sensitivity gap and a positive-rate statistic. Across nine VLMs, EVA02-CLIP and large OpenCLIP variants lie on a favorable invariance-sensitivity frontier, combining low paraphrase-induced variance with consistently higher scores for original captions than for their flipped counterparts. In contrast, SigLIP and SigLIP2 show much larger invariance error and often prefer flipped captions to the human descriptions, especially for object and color edits. These failures are largely invisible to standard retrieval metrics, indicating that LGIP provides a model-agnostic diagnostic for the linguistic robustness of VLMs beyond conventional accuracy scores.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13488v1" target="_blank" rel="noopener noreferrer">
                InterMoE：通过动态时序选择性混合专家模型生成个体特定的3D人体交互
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            InterMoE: Individual-Specific 3D Human Interaction Generation via Dynamic Temporal-Selective MoE
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lipeng Wang, Hongxing Fan, Haohua Chen, Zehuan Huang, Lu Sheng
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然该论文涉及MoE架构（混合专家模型），但其核心应用领域是3D人体交互生成，这属于计算机图形学和3D视觉范畴。该技术主要面向动画、虚拟现实等应用，与推荐系统、搜索或广告领域缺乏明确的关联性和应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:26:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13488v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13488v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generating high-quality human interactions holds significant value for applications like virtual reality and robotics. However, existing methods often fail to preserve unique individual characteristics or fully adhere to textual descriptions. To address these challenges, we introduce InterMoE, a novel framework built on a Dynamic Temporal-Selective Mixture of Experts. The core of InterMoE is a routing mechanism that synergistically uses both high-level text semantics and low-level motion context to dispatch temporal motion features to specialized experts. This allows experts to dynamically determine the selection capacity and focus on critical temporal features, thereby preserving specific individual characteristic identities while ensuring high semantic fidelity. Extensive experiments show that InterMoE achieves state-of-the-art performance in individual-specific high-fidelity 3D human interaction generation, reducing FID scores by 9% on the InterHuman dataset and 22% on InterX.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13442v1" target="_blank" rel="noopener noreferrer">
                解锁基础多模态大语言模型的伪造检测潜力：一种新型免训练流程
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Unlocking the Forgery Detection Potential of Vanilla MLLMs: A Novel Training-Free Pipeline
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rui Zuo, Qinyue Tong, Zhe-Ming Lu, Ziqian Lu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于多模态伪造检测，属于计算机视觉安全应用领域，与推荐系统、搜索或广告的核心技术需求无关。虽然涉及多模态大语言模型，但其应用方向（伪造检测）属于安全/取证范畴，属于明确的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:49:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13442v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13442v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid advancement of artificial intelligence-generated content (AIGC) technologies, including multimodal large language models (MLLMs) and diffusion models, image generation and manipulation have become remarkably effortless. Existing image forgery detection and localization (IFDL) methods often struggle to generalize across diverse datasets and offer limited interpretability. Nowadays, MLLMs demonstrate strong generalization potential across diverse vision-language tasks, and some studies introduce this capability to IFDL via large-scale training. However, such approaches cost considerable computational resources, while failing to reveal the inherent generalization potential of vanilla MLLMs to address this problem. Inspired by this observation, we propose Foresee, a training-free MLLM-based pipeline tailored for image forgery analysis. It eliminates the need for additional training and enables a lightweight inference process, while surpassing existing MLLM-based methods in both tamper localization accuracy and the richness of textual explanations. Foresee employs a type-prior-driven strategy and utilizes a Flexible Feature Detector (FFD) module to specifically handle copy-move manipulations, thereby effectively unleashing the potential of vanilla MLLMs in the forensic domain. Extensive experiments demonstrate that our approach simultaneously achieves superior localization accuracy and provides more comprehensive textual explanations. Moreover, Foresee exhibits stronger generalization capability, outperforming existing IFDL methods across various tampering types, including copy-move, splicing, removal, local enhancement, deepfake, and AIGC-based editing. The code will be released in the final version.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13400v1" target="_blank" rel="noopener noreferrer">
                这是什么颜色？一个文本干扰多模态幻觉基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            What Color Is It? A Text-Interference Multimodal Hallucination Benchmark
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jinkun Zhao, Lei Huang, Wenjun Wu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确涉及多模态幻觉基准测试，这属于纯粹的评估基准范畴，与我的关注点无关。虽然提到了多模态，但焦点是幻觉检测和基准测试，而非将异构数据作为不同模态进行统一建模的创新方法。该工作没有展示在推荐系统、搜索或广告中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:15:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13400v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13400v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid advancement of Large Models, numerous text-and-vision-fused Multimodal Large Models (MLMs) have emerged. However, these MLMs remain susceptible to informational interference in visual perception, particularly in color perception, which introduces an additional risk of hallucination. To validate this hypothesis, we introduce the "What Color Is It" dataset, a novel benchmark constructed using a simple method to trigger single-modality visual hallucination in MLMs. Based on this dataset, we further investigate the underlying causes of hallucination in the visual modality of MLMs and propose potential solutions to enhance their robustness.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13387v1" target="_blank" rel="noopener noreferrer">
                广义去噪扩散码本模型（gDDCM）：使用预训练扩散模型对图像进行标记化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generalized Denoising Diffusion Codebook Models (gDDCM): Tokenizing images using a pre-trained diffusion model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fei Kong
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注图像标记化和扩散模型，属于计算机视觉领域。虽然扩散模型是生成模型的一种，但该工作主要针对图像处理，没有明确展示在推荐系统、搜索或广告中的潜在应用。标题中缺乏与异构数据建模、序列处理或排名任务相关的元素。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:58:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13387v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13387v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recently, the Denoising Diffusion Codebook Models (DDCM) was proposed. DDCM leverages the Denoising Diffusion Probabilistic Model (DDPM) and replaces the random noise in the backward process with noise sampled from specific sets according to a predefined rule, thereby enabling image compression. However, DDCM cannot be applied to methods other than DDPM. In this paper, we propose the generalized Denoising Diffusion Compression Model (gDDCM), which extends DDCM to mainstream diffusion models and their variants, including DDPM, Score-Based Models, Consistency Models, and Rectified Flow. We evaluate our method on CIFAR-10 and LSUN Bedroom datasets. Experimental results demonstrate that our approach successfully generalizes DDCM to the aforementioned models and achieves improved performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13315v1" target="_blank" rel="noopener noreferrer">
                基于计算机视觉的群体活动检测与动作定位
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Computer Vision based group activity detection and action spotting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Narthana Sivalingam, Santhirarajah Sivasthigan, Thamayanthi Mahendranathan, G. M...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于纯粹的计算机视觉任务（群体活动检测和动作定位），没有明确展示与推荐系统、搜索或广告的关联。虽然视觉技术在某些场景下可能作为辅助特征，但论文标题本身并未表明任何在推荐、搜索或广告领域的潜在应用，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:52:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13315v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13315v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Group activity detection in multi-person scenes is challenging due to complex human interactions, occlusions, and variations in appearance over time. This work presents a computer vision based framework for group activity recognition and action spotting using a combination of deep learning models and graph based relational reasoning. The system first applies Mask R-CNN to obtain accurate actor localization through bounding boxes and instance masks. Multiple backbone networks, including Inception V3, MobileNet, and VGG16, are used to extract feature maps, and RoIAlign is applied to preserve spatial alignment when generating actor specific features. The mask information is then fused with the feature maps to obtain refined masked feature representations for each actor. To model interactions between individuals, we construct Actor Relation Graphs that encode appearance similarity and positional relations using methods such as normalized cross correlation, sum of absolute differences, and dot product. Graph Convolutional Networks operate on these graphs to reason about relationships and predict both individual actions and group level activities. Experiments on the Collective Activity dataset demonstrate that the combination of mask based feature refinement, robust similarity search, and graph neural network reasoning leads to improved recognition performance across both crowded and non crowded scenarios. This approach highlights the potential of integrating segmentation, feature extraction, and relational graph reasoning for complex video understanding tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13276v1" target="_blank" rel="noopener noreferrer">
                基于弱监督双编码器模型的监控视频异常事件识别
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Recognition of Abnormal Events in Surveillance Videos using Weakly Supervised Dual-Encoder Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Noam Tsfaty, Avishai Weizman, Liav Cohen, Moshe Tshuva, Yehudit Aperstein
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的异常事件检测，属于纯粹的视觉任务，与推荐系统、搜索或广告的核心技术没有直接关联。虽然双编码器架构在概念上类似于多模态建模，但该论文专注于监控视频分析，没有展示在异构数据统一建模或推荐系统应用方面的潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:47:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13276v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13276v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We address the challenge of detecting rare and diverse anomalies in surveillance videos using only video-level supervision. Our dual-backbone framework combines convolutional and transformer representations through top-k pooling, achieving 90.7% area under the curve (AUC) on the UCF-Crime dataset.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13269v1" target="_blank" rel="noopener noreferrer">
                您的视觉语言模型是否具备天空就绪能力？面向无人机导航的综合空间智能基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Is your VLM Sky-Ready? A Comprehensive Spatial Intelligence Benchmark for UAV Navigation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lingfeng Zhang, Yuchen Zhang, Hongsheng Li, Haoxiang Fu, Yingbo Tang, Hangjun Ye...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注无人机导航的视觉语言模型基准测试，属于计算机视觉和机器人领域的特定应用。虽然涉及视觉语言模型技术，但其应用场景（无人机导航）与推荐系统、搜索或广告领域没有直接关联，且缺乏将这些技术迁移到异构数据处理或推荐场景的明确潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:39:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13269v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13269v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language Models (VLMs), leveraging their powerful visual perception and reasoning capabilities, have been widely applied in Unmanned Aerial Vehicle (UAV) tasks. However, the spatial intelligence capabilities of existing VLMs in UAV scenarios remain largely unexplored, raising concerns about their effectiveness in navigating and interpreting dynamic environments. To bridge this gap, we introduce SpatialSky-Bench, a comprehensive benchmark specifically designed to evaluate the spatial intelligence capabilities of VLMs in UAV navigation. Our benchmark comprises two categories-Environmental Perception and Scene Understanding-divided into 13 subcategories, including bounding boxes, color, distance, height, and landing safety analysis, among others. Extensive evaluations of various mainstream open-source and closed-source VLMs reveal unsatisfactory performance in complex UAV navigation scenarios, highlighting significant gaps in their spatial capabilities. To address this challenge, we developed the SpatialSky-Dataset, a comprehensive dataset containing 1M samples with diverse annotations across various scenarios. Leveraging this dataset, we introduce Sky-VLM, a specialized VLM designed for UAV spatial reasoning across multiple granularities and contexts. Extensive experimental results demonstrate that Sky-VLM achieves state-of-the-art performance across all benchmark tasks, paving the way for the development of VLMs suitable for UAV scenarios. The source code is available at https://github.com/linglingxiansen/SpatialSKy.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13261v1" target="_blank" rel="noopener noreferrer">
                构建以自我为中心的程序化AI助手：方法、基准与挑战
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Building Egocentric Procedural AI Assistant: Methods, Benchmarks, and Challenges
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junlong Li, Huaiyuan Xu, Sijie Cheng, Kejun Wu, Kim-Hui Yap, Lap-Pui Chau, Yi Wa...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注以自我为中心的AI助手构建，涉及程序化任务执行和基准测试，这与推荐系统、搜索或广告的核心技术领域关联度较低。虽然AI助手技术可能有间接应用，但论文焦点更偏向通用AI助手而非特定于RecSys/Search/Ads的排名、检索或个性化技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:21:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13261v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13261v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Driven by recent advances in vision language models (VLMs) and egocentric perception research, we introduce the concept of an egocentric procedural AI assistant (EgoProceAssist) tailored to step-by-step support daily procedural tasks in a first-person view. In this work, we start by identifying three core tasks: egocentric procedural error detection, egocentric procedural learning, and egocentric procedural question answering. These tasks define the essential functions of EgoProceAssist within a new taxonomy. Specifically, our work encompasses a comprehensive review of current techniques, relevant datasets, and evaluation metrics across these three core areas. To clarify the gap between the proposed EgoProceAssist and existing VLM-based AI assistants, we introduce novel experiments and provide a comprehensive evaluation of representative VLM-based methods. Based on these findings and our technical analysis, we discuss the challenges ahead and suggest future research directions. Furthermore, an exhaustive list of this study is publicly available in an active repository that continuously collects the latest work: https://github.com/z1oong/Building-Egocentric-Procedural-AI-Assistant
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13259v1" target="_blank" rel="noopener noreferrer">
                GeoX-Bench：评估大型多模态模型在跨视角地理定位与姿态估计能力上的基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GeoX-Bench: Benchmarking Cross-View Geo-Localization and Pose Estimation Capabilities of Large Multimodal Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yushuo Zheng, Jiangyong Ying, Huiyu Duan, Chunyi Li, Zicheng Zhang, Jing Liu, Xi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注地理定位和姿态估计的基准测试，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然涉及多模态模型，但其应用场景（地理定位）与用户行为建模、内容排序等RecSys/Search/Ads核心任务缺乏直接联系，潜在应用价值有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:19:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13259v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13259v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large multimodal models (LMMs) have demonstrated remarkable capabilities across a wide range of tasks, however their knowledge and abilities in the cross-view geo-localization and pose estimation domains remain unexplored, despite potential benefits for navigation, autonomous driving, outdoor robotics, \textit{etc}. To bridge this gap, we introduce \textbf{GeoX-Bench}, a comprehensive \underline{Bench}mark designed to explore and evaluate the capabilities of LMMs in \underline{cross}-view \underline{Geo}-localization and pose estimation. Specifically, GeoX-Bench contains 10,859 panoramic-satellite image pairs spanning 128 cities in 49 countries, along with corresponding 755,976 question-answering (QA) pairs. Among these, 42,900 QA pairs are designated for benchmarking, while the remaining are intended to enhance the capabilities of LMMs. Based on GeoX-Bench, we evaluate the capabilities of 25 state-of-the-art LMMs on cross-view geo-localization and pose estimation tasks, and further explore the empowered capabilities of instruction-tuning. Our benchmark demonstrate that while current LMMs achieve impressive performance in geo-localization tasks, their effectiveness declines significantly on the more complex pose estimation tasks, highlighting a critical area for future improvement, and instruction-tuning LMMs on the training data of GeoX-Bench can significantly improve the cross-view geo-sense abilities. The GeoX-Bench is available at \textcolor{magenta}{https://github.com/IntMeGroup/GeoX-Bench}.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13249v1" target="_blank" rel="noopener noreferrer">
                基于多上下文重叠窗口交叉注意力的指代性伪装目标检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Referring Camouflaged Object Detection With Multi-Context Overlapped Windows Cross-Attention
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yu Wen, Shuyong Gao, Shuping Zhang, Miao Huang, Lili Tao, Han Yang, Haozhe Xing,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉中的伪装目标检测任务，虽然涉及注意力机制，但其核心应用领域是视觉场景理解而非推荐系统、搜索或广告。多上下文重叠窗口交叉注意力技术可能对处理视觉内容有一定启发，但与当前关注的文本序列处理、用户行为建模等核心需求相关性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:08:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13249v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13249v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Referring camouflaged object detection (Ref-COD) aims to identify hidden objects by incorporating reference information such as images and text descriptions. Previous research has transformed reference images with salient objects into one-dimensional prompts, yielding significant results. We explore ways to enhance performance through multi-context fusion of rich salient image features and camouflaged object features. Therefore, we propose RFMNet, which utilizes features from multiple encoding stages of the reference salient images and performs interactive fusion with the camouflage features at the corresponding encoding stages. Given that the features in salient object images contain abundant object-related detail information, performing feature fusion within local areas is more beneficial for detecting camouflaged objects. Therefore, we propose an Overlapped Windows Cross-attention mechanism to enable the model to focus more attention on the local information matching based on reference features. Besides, we propose the Referring Feature Aggregation (RFA) module to decode and segment the camouflaged objects progressively. Extensive experiments on the Ref-COD benchmark demonstrate that our method achieves state-of-the-art performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13242v1" target="_blank" rel="noopener noreferrer">
                MMD-Thinker：面向多模态虚假信息检测的自适应多维思考
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MMD-Thinker: Adaptive Multi-Dimensional Thinking for Multimodal Misinformation Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junjie Wu, Guohong Fu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态虚假信息检测，这属于内容安全领域而非推荐系统、搜索或广告的核心技术。虽然涉及多模态处理，但其应用场景（虚假信息检测）与用户行为建模、内容排序或广告投放等核心业务需求关联度较低。该技术缺乏明确的在推荐、搜索或广告系统中的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:04:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13242v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13242v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal misinformation floods on various social media, and continues to evolve in the era of AI-generated content (AIGC). The emerged misinformation with low creation cost and high deception poses significant threats to society. While recent studies leverage general-purpose multimodal large language models (MLLMs) to achieve remarkable results in detection, they encounter two critical limitations: (1) Insufficient reasoning, where general-purpose MLLMs often follow the uniform reasoning paradigm but generate inaccurate explanations and judgments, due to the lack of the task-specific knowledge of multimodal misinformation detection. (2) Reasoning biases, where a single thinking mode make detectors a suboptimal path for judgment, struggling to keep pace with the fast-growing and intricate multimodal misinformation. In this paper, we propose MMD-Thinker, a two-stage framework for multimodal misinformation detection through adaptive multi-dimensional thinking. First, we develop tailor-designed thinking mode for multimodal misinformation detection. Second, we adopt task-specific instruction tuning to inject the tailored thinking mode into general-purpose MLLMs. Third, we further leverage reinforcement learning strategy with a mixed advantage function, which incentivizes the reasoning capabilities in trajectories. Furthermore, we construct the multimodal misinformation reasoning (MMR) dataset, encompasses more than 8K image-text pairs with both reasoning processes and classification labels, to make progress in the relam of multimodal misinformation detection. Experimental results demonstrate that our proposed MMD-Thinker achieves state-of-the-art performance on both in-domain and out-of-domain benchmark datasets, while maintaining flexible inference and token usage. Code will be publicly available at Github.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13211v1" target="_blank" rel="noopener noreferrer">
                3DAlign-DAER：面向细粒度3D-文本对齐的大规模动态注意力策略与高效检索策略
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            3DAlign-DAER: Dynamic Attention Policy and Efficient Retrieval Strategy for Fine-grained 3D-Text Alignment at Scale
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yijia Fan, Jusheng Zhang, Kaitong Cai, Jing Yang, Jian Wang, Keze Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注3D视觉与文本对齐，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然动态注意力策略和高效检索技术在某些多模态推荐场景中可能有间接应用，但论文明确聚焦于3D视觉这一特定领域，缺乏明确的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:23:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13211v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13211v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite recent advancements in 3D-text cross-modal alignment, existing state-of-the-art methods still struggle to align fine-grained textual semantics with detailed geometric structures, and their alignment performance degrades significantly when scaling to large-scale 3D databases. To overcome this limitation, we introduce 3DAlign-DAER, a unified framework designed to align text and 3D geometry via the proposed dynamic attention policy and the efficient retrieval strategy, capturing subtle correspondences for diverse cross-modal retrieval and classification tasks. Specifically, during the training, our proposed dynamic attention policy (DAP) employs the Hierarchical Attention Fusion (HAF) module to represent the alignment as learnable fine-grained token-to-point attentions. To optimize these attentions across different tasks and geometric hierarchies, our DAP further exploits the Monte Carlo tree search to dynamically calibrate HAF attention weights via a hybrid reward signal and further enhances the alignment between textual descriptions and local 3D geometry. During the inference, our 3DAlign-DAER introduces an Efficient Retrieval Strategy (ERS) to leverage efficient hierarchical searching in the large-scale embedding spaces, outperforming traditional methods (e.g., KNN) in accuracy and efficiency. Furthermore, to facilitate text-3D alignment research and train our 3DAlign-DAER, we construct Align3D-2M, a large-scale dataset featuring 2M text-3D pairs, to provide sufficient fine-grained cross-modal annotations. Extensive and comprehensive experiments demonstrate the superior performance of our 3DAlign-DAER on diverse benchmarks. We will release our codes, models, and datasets.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13208v1" target="_blank" rel="noopener noreferrer">
                基于姿态感知视频Transformer的端到端多人姿态估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            End-to-End Multi-Person Pose Estimation with Pose-Aware Video Transformer
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yonghui Yu, Jiahang Cai, Xun Wang, Wenwu Yang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的姿态估计任务，属于纯粹的视觉领域研究。虽然Transformer架构在技术上是相关的，但该工作没有展示与推荐系统、搜索或广告的明确联系或潜在应用。姿态估计本身在这些领域缺乏直接的应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:19:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13208v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13208v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Existing multi-person video pose estimation methods typically adopt a two-stage pipeline: detecting individuals in each frame, followed by temporal modeling for single-person pose estimation. This design relies on heuristic operations such as detection, RoI cropping, and non-maximum suppression (NMS), limiting both accuracy and efficiency. In this paper, we present a fully end-to-end framework for multi-person 2D pose estimation in videos, effectively eliminating heuristic operations. A key challenge is to associate individuals across frames under complex and overlapping temporal trajectories. To address this, we introduce a novel Pose-Aware Video transformEr Network (PAVE-Net), which features a spatial encoder to model intra-frame relations and a spatiotemporal pose decoder to capture global dependencies across frames. To achieve accurate temporal association, we propose a pose-aware attention mechanism that enables each pose query to selectively aggregate features corresponding to the same individual across consecutive frames.Additionally, we explicitly model spatiotemporal dependencies among pose keypoints to improve accuracy. Notably, our approach is the first end-to-end method for multi-frame 2D human pose estimation.Extensive experiments show that PAVE-Net substantially outperforms prior image-based end-to-end methods, achieving a \textbf{6.0} mAP improvement on PoseTrack2017, and delivers accuracy competitive with state-of-the-art two-stage video-based approaches, while offering significant gains in efficiency.Project page: https://github.com/zgspose/PAVENet
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13207v1" target="_blank" rel="noopener noreferrer">
                PIGEON：基于视觉语言模型的通过兴趣点选择进行物体导航
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PIGEON: VLM-Driven Object Navigation via Points of Interest Selection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cheng Peng, Zhenzhe Zhang, Cheng Chi, Xiaobao Wei, Yanhao Zhang, Heng Wang, Peng...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉导航任务，属于机器人学和计算机视觉领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然提到了视觉语言模型（VLM），但其应用场景是物理世界的物体导航，而非处理推荐、搜索或广告中的异构数据建模问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:19:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13207v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13207v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Navigating to a specified object in an unknown environment is a fundamental yet challenging capability of embodied intelligence. However, current methods struggle to balance decision frequency with intelligence, resulting in decisions lacking foresight or discontinuous actions. In this work, we propose PIGEON: Point of Interest Guided Exploration for Object Navigation with VLM, maintaining a lightweight and semantically aligned snapshot memory during exploration as semantic input for the exploration strategy. We use a large Visual-Language Model (VLM), named PIGEON-VL, to select Points of Interest (PoI) formed during exploration and then employ a lower-level planner for action output, increasing the decision frequency. Additionally, this PoI-based decision-making enables the generation of Reinforcement Learning with Verifiable Reward (RLVR) data suitable for simulators. Experiments on classic object navigation benchmarks demonstrate that our zero-shot transfer method achieves state-of-the-art performance, while RLVR further enhances the model's semantic guidance capabilities, enabling deep reasoning during real-time navigation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13204v1" target="_blank" rel="noopener noreferrer">
                RefineVAD：基于语义引导特征重校准的弱监督视频异常检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RefineVAD: Semantic-Guided Feature Recalibration for Weakly Supervised Video Anomaly Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junhee Lee, ChaeBeen Bang, MyoungChul Kim, MyeongAh Cho
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的视频异常检测，虽然涉及弱监督学习和特征重校准技术，但其核心应用场景（视频监控）与搜索、推荐或广告系统没有直接关联。论文中提到的语义引导特征处理技术可能对处理多模态数据有启发，但这种潜在应用过于间接且不明确。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:15:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13204v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13204v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Weakly-Supervised Video Anomaly Detection aims to identify anomalous events using only video-level labels, balancing annotation efficiency with practical applicability. However, existing methods often oversimplify the anomaly space by treating all abnormal events as a single category, overlooking the diverse semantic and temporal characteristics intrinsic to real-world anomalies. Inspired by how humans perceive anomalies, by jointly interpreting temporal motion patterns and semantic structures underlying different anomaly types, we propose RefineVAD, a novel framework that mimics this dual-process reasoning. Our framework integrates two core modules. The first, Motion-aware Temporal Attention and Recalibration (MoTAR), estimates motion salience and dynamically adjusts temporal focus via shift-based attention and global Transformer-based modeling. The second, Category-Oriented Refinement (CORE), injects soft anomaly category priors into the representation space by aligning segment-level features with learnable category prototypes through cross-attention. By jointly leveraging temporal dynamics and semantic structure, explicitly models both "how" motion evolves and "what" semantic category it resembles. Extensive experiments on WVAD benchmark validate the effectiveness of RefineVAD and highlight the importance of integrating semantic context to guide feature refinement toward anomaly-relevant patterns.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13190v1" target="_blank" rel="noopener noreferrer">
                基于物体中心三维展开的视频空间推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Video Spatial Reasoning with Object-Centric 3D Rollout
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haoran Tang, Meng Cao, Ruyang Liu, Xiaoxi Liang, Linglong Li, Ge Li, Xiaodan Lia...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频理解和3D空间推理，属于计算机视觉领域。虽然提到了物体中心的表示学习，但其核心应用场景是视频分析而非推荐系统、搜索或广告。论文缺乏明确的连接点表明其技术可以应用于处理推荐系统中的异构数据或用户行为序列建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:53:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13190v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13190v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in Multi-modal Large Language Models (MLLMs) have showcased remarkable capabilities in vision-language understanding. However, enabling robust video spatial reasoning-the ability to comprehend object locations, orientations, and inter-object relationships in dynamic 3D scenes-remains a key unsolved challenge. Existing approaches primarily rely on spatially grounded supervised fine-tuning or reinforcement learning, yet we observe that such models often exhibit query-locked reasoning, focusing narrowly on objects explicitly mentioned in the prompt while ignoring critical contextual cues. To address this limitation, we propose Object-Centric 3D Rollout (OCR), a novel strategy that introduces structured perturbations to the 3D geometry of selected objects during training. By degrading object-specific visual cues and projecting the altered geometry into 2D space, OCR compels the model to reason holistically across the entire scene. We further design a rollout-based training pipeline that jointly leverages vanilla and region-noisy videos to optimize spatial reasoning trajectories. Experiments demonstrate state-of-the-art performance: our 3B-parameter model achieves 47.5% accuracy on VSI-Bench, outperforming several 7B baselines. Ablations confirm OCR's superiority over prior rollout strategies (e.g., T-GRPO, NoisyRollout).
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13131v1" target="_blank" rel="noopener noreferrer">
                MM-Telco：面向电信应用的基准与多模态大语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MM-Telco: Benchmarks and Multimodal Large Language Models for Telecom Applications
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Gagan Raj Gupta, Anshul Kumar, Manish Rai, Apu Chakraborty, Ashutosh Modi, Abdel...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确聚焦于电信领域的特定应用（Telecom Applications），这属于与推荐系统、搜索或广告无关的垂直领域应用。虽然提到了多模态大语言模型技术，但其应用场景限定在电信行业，缺乏与推荐/搜索/广告领域的直接关联或可迁移性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:34:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13131v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13131v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.ET</span><span class="category-tag">cs.NI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) have emerged as powerful tools for automating complex reasoning and decision-making tasks. In telecommunications, they hold the potential to transform network optimization, automate troubleshooting, enhance customer support, and ensure regulatory compliance. However, their deployment in telecom is hindered by domain-specific challenges that demand specialized adaptation. To overcome these challenges and to accelerate the adaptation of LLMs for telecom, we propose MM-Telco, a comprehensive suite of multimodal benchmarks and models tailored for the telecom domain. The benchmark introduces various tasks (both text based and image based) that address various practical real-life use cases such as network operations, network management, improving documentation quality, and retrieval of relevant text and images. Further, we perform baseline experiments with various LLMs and VLMs. The models fine-tuned on our dataset exhibit a significant boost in performance. Our experiments also help analyze the weak areas in the working of current state-of-art multimodal LLMs, thus guiding towards further development and research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13121v1" target="_blank" rel="noopener noreferrer">
                CloseUpShot：基于点条件扩散模型的稀疏视图近距离新颖视角合成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CloseUpShot: Close-up Novel View Synthesis from Sparse-views via Point-conditioned Diffusion Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuqi Zhang, Guanying Chen, Jiaxing Chen, Chuanyu Fu, Chuan Huang, Shuguang Cui
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的新颖视角合成技术，特别是近距离场景的生成。虽然扩散模型是LLM相关的生成技术，但论文的应用场景（3D视觉、视图合成）与推荐系统、搜索或广告的核心领域没有直接关联。这种技术可能在某些边缘场景中有潜在应用，但缺乏明确的RecSys/Search/Ads相关性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:20:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13121v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13121v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reconstructing 3D scenes and synthesizing novel views from sparse input views is a highly challenging task. Recent advances in video diffusion models have demonstrated strong temporal reasoning capabilities, making them a promising tool for enhancing reconstruction quality under sparse-view settings. However, existing approaches are primarily designed for modest viewpoint variations, which struggle in capturing fine-grained details in close-up scenarios since input information is severely limited. In this paper, we present a diffusion-based framework, called CloseUpShot, for close-up novel view synthesis from sparse inputs via point-conditioned video diffusion. Specifically, we observe that pixel-warping conditioning suffers from severe sparsity and background leakage in close-up settings. To address this, we propose hierarchical warping and occlusion-aware noise suppression, enhancing the quality and completeness of the conditioning images for the video diffusion model. Furthermore, we introduce global structure guidance, which leverages a dense fused point cloud to provide consistent geometric context to the diffusion process, to compensate for the lack of globally consistent 3D constraints in sparse conditioning inputs. Extensive experiments on multiple datasets demonstrate that our method outperforms existing approaches, especially in close-up novel view synthesis, clearly validating the effectiveness of our design.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13113v1" target="_blank" rel="noopener noreferrer">
                语义与内容至关重要：面向图像去雨的多先验分层Mamba方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Semantics and Content Matter: Towards Multi-Prior Hierarchical Mamba for Image Deraining
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhaocheng Yu, Kui Jiang, Junjun Jiang, Xianming Liu, Guanglu Sun, Yi Xiao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像去雨任务，属于纯粹的图像处理应用。虽然提到了Mamba架构（一种序列建模方法），但其应用场景与推荐系统、搜索或广告没有直接关联。该技术缺乏在异构数据处理或序列建模方面的明显潜力，无法应用于推荐系统、搜索或广告领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:08:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13113v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13113v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Rain significantly degrades the performance of computer vision systems, particularly in applications like autonomous driving and video surveillance. While existing deraining methods have made considerable progress, they often struggle with fidelity of semantic and spatial details. To address these limitations, we propose the Multi-Prior Hierarchical Mamba (MPHM) network for image deraining. This novel architecture synergistically integrates macro-semantic textual priors (CLIP) for task-level semantic guidance and micro-structural visual priors (DINOv2) for scene-aware structural information. To alleviate potential conflicts between heterogeneous priors, we devise a progressive Priors Fusion Injection (PFI) that strategically injects complementary cues at different decoder levels. Meanwhile, we equip the backbone network with an elaborate Hierarchical Mamba Module (HMM) to facilitate robust feature representation, featuring a Fourier-enhanced dual-path design that concurrently addresses global context modeling and local detail recovery. Comprehensive experiments demonstrate MPHM's state-of-the-art performance, achieving a 0.57 dB PSNR gain on the Rain200H dataset while delivering superior generalization on real-world rainy scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13108v1" target="_blank" rel="noopener noreferrer">
                DGS-Net：用于AI生成图像检测中CLIP微调的蒸馏引导梯度手术
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DGS-Net: Distillation-Guided Gradient Surgery for CLIP Fine-Tuning in AI-Generated Image Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiazhen Yan, Ziqiang Li, Fan Wang, Boyu Wang, Zhangjie Fu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于AI生成图像检测，这是一个与内容生成和真实性验证相关的计算机视觉任务。虽然使用了CLIP模型，但应用领域（图像检测）与推荐系统、搜索或广告的排名和个性化核心问题没有直接关联。该技术缺乏在RecSys/Search/Ads中的明显应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:05:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13108v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13108v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid progress of generative models such as GANs and diffusion models has led to the widespread proliferation of AI-generated images, raising concerns about misinformation, privacy violations, and trust erosion in digital media. Although large-scale multimodal models like CLIP offer strong transferable representations for detecting synthetic content, fine-tuning them often induces catastrophic forgetting, which degrades pre-trained priors and limits cross-domain generalization. To address this issue, we propose the Distillation-guided Gradient Surgery Network (DGS-Net), a novel framework that preserves transferable pre-trained priors while suppressing task-irrelevant components. Specifically, we introduce a gradient-space decomposition that separates harmful and beneficial descent directions during optimization. By projecting task gradients onto the orthogonal complement of harmful directions and aligning with beneficial ones distilled from a frozen CLIP encoder, DGS-Net achieves unified optimization of prior preservation and irrelevant suppression. Extensive experiments on 50 generative models demonstrate that our method outperforms state-of-the-art approaches by an average margin of 6.6, achieving superior detection performance and generalization across diverse generation techniques.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13081v1" target="_blank" rel="noopener noreferrer">
                重新思考显著性图：面向认知对齐的解释方法分类与评估框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Rethinking Saliency Maps: A Cognitive Human Aligned Taxonomy and Evaluation Framework for Explanations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yehonatan Elisha, Seffi Cohen, Oren Barkan, Noam Koenigstein
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注模型解释性和人类认知对齐的评估框架，属于模型可解释性范畴。虽然可解释性在推荐/搜索系统中具有一定价值，但该工作更偏向通用的模型解释方法研究，与当前关注的推荐系统核心算法进展、LLM技术应用、Transformer架构创新等焦点领域关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:29:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13081v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13081v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Saliency maps are widely used for visual explanations in deep learning, but a fundamental lack of consensus persists regarding their intended purpose and alignment with diverse user queries. This ambiguity hinders the effective evaluation and practical utility of explanation methods.We address this gap by introducing the Reference-Frame $\times$ Granularity (RFxG) taxonomy, a principled conceptual framework that organizes saliency explanations along two essential axes:Reference-Frame: Distinguishing between pointwise ("Why this prediction?") and contrastive ("Why this and not an alternative?") explanations.Granularity: Ranging from fine-grained class-level (e.g., "Why Husky?") to coarse-grained group-level (e.g., "Why Dog?") interpretations.Using the RFxG lens, we demonstrate critical limitations in existing evaluation metrics, which overwhelmingly prioritize pointwise faithfulness while neglecting contrastive reasoning and semantic granularity. To systematically assess explanation quality across both RFxG dimensions, we propose four novel faithfulness metrics. Our comprehensive evaluation framework applies these metrics to ten state-of-the-art saliency methods, four model architectures, and three datasets.By advocating a shift toward user-intent-driven evaluation, our work provides both the conceptual foundation and the practical tools necessary to develop visual explanations that are not only faithful to the underlying model behavior but are also meaningfully aligned with the complexity of human understanding and inquiry.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13389v1" target="_blank" rel="noopener noreferrer">
                通过时间序列因果推断揭示铸造工业过程能源效率的因果驱动因素
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Uncovering Causal Drivers of Energy Efficiency for Industrial Process in Foundry via Time-Series Causal Inference
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhipeng Ma, Bo Nørregaard Jørgensen, Zheng Grace Ma
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于工业铸造过程的能源效率优化，属于特定工业领域的应用研究。论文内容涉及时间序列因果推断，但应用场景与推荐系统、搜索或广告完全无关。没有发现任何与LLM技术、Transformer架构或推荐系统核心进展相关的潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:00:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13389v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13389v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Improving energy efficiency in industrial foundry processes is a critical challenge, as these operations are highly energy-intensive and marked by complex interdependencies among process variables. Correlation-based analyses often fail to distinguish true causal drivers from spurious associations, limiting their usefulness for decision-making. This paper applies a time-series causal inference framework to identify the operational factors that directly affect energy efficiency in induction furnace melting. Using production data from a Danish foundry, the study integrates time-series clustering to segment melting cycles into distinct operational modes with the PCMCI+ algorithm, a state-of-the-art causal discovery method, to uncover cause-effect relationships within each mode. Across clusters, robust causal relations among energy consumption, furnace temperature, and material weight define the core drivers of efficiency, while voltage consistently influences cooling water temperature with a delayed response. Cluster-specific differences further distinguish operational regimes: efficient clusters are characterized by stable causal structures, whereas inefficient ones exhibit reinforcing feedback loops and atypical dependencies. The contributions of this study are twofold. First, it introduces an integrated clustering-causal inference pipeline as a methodological innovation for analyzing energy-intensive processes. Second, it provides actionable insights that enable foundry operators to optimize performance, reduce energy consumption, and lower emissions.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13357v1" target="_blank" rel="noopener noreferrer">
                FLOWER：面向流程的实体关系工具
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FLOWER: Flow-Oriented Entity-Relationship Tool
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dmitry Moskalev
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明这是一个实体关系工具，专注于流程建模，这与推荐系统、搜索或广告的核心技术领域无关。没有证据表明该工具涉及LLM、Transformer架构或异构数据建模，也不属于RecSys/Search/Ads的核心进展范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:23:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13357v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13357v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SE</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Exploring relationships across data sources is a crucial optimization for entities recognition. Since databases can store big amount of information with synthetic and organic data, serving all quantity of objects correctly is an important task to deal with. However, the decision of how to construct entity relationship model is associated with human factor. In this paper, we present flow-oriented entity-relationship tool. This is first and unique end-to-end solution that eliminates routine and resource-intensive problems of processing, creating and visualizing both of explicit and implicit dependencies for prominent SQL dialects on-the-fly. Once launched, FLOWER automatically detects built-in constraints and starting to create own correct and necessary one using dynamic sampling and robust data analysis techniques. This approach applies to improve entity-relationship model and data storytelling to better understand the foundation of data and get unseen insights from DB sources using SQL or natural language. Evaluated on state-of-the-art STATS benchmark, experiments show that FLOWER is superior to reservoir sampling by 2.4x for distribution representation and 2.6x for constraint learning with 2.15x acceleration. For data storytelling, our tool archives 1.19x for accuracy enhance with 1.86x context decrease compare to LLM. Presented tool is also support 23 languages and compatible with both of CPU and GPU. Those results show that FLOWER can manage with real-world data a way better to ensure with quality, scalability and applicability for different use-cases.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13271v1" target="_blank" rel="noopener noreferrer">
                考察生成式AI模型在软件编程学生学习活动中的使用情况
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Examining the Usage of Generative AI Models in Student Learning Activities for Software Programming
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rufeng Chen, Shuaishuai Jiang, Jiyun Shen, AJung Moon, Lili Wei
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于教育领域中学生使用生成式AI学习编程的应用场景，属于纯粹的AIGC和教育应用主题。论文内容与推荐系统、搜索、广告的核心技术进展或使能技术无关，也没有涉及Transformer架构改进或异构数据统一建模等关键技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:42:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13271v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13271v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SE</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rise of Generative AI (GenAI) tools like ChatGPT has created new opportunities and challenges for computing education. Existing research has primarily focused on GenAI's ability to complete educational tasks and its impact on student performance, often overlooking its effects on knowledge gains. In this study, we investigate how GenAI assistance compares to conventional online resources in supporting knowledge gains across different proficiency levels. We conducted a controlled user experiment with 24 undergraduate students of two different levels of programming experience (beginner, intermediate) to examine how students interact with ChatGPT while solving programming tasks. We analyzed task performance, conceptual understanding, and interaction behaviors. Our findings reveal that generating complete solutions with GenAI significantly improves task performance, especially for beginners, but does not consistently result in knowledge gains. Importantly, usage strategies differ by experience: beginners tend to rely heavily on GenAI toward task completion often without knowledge gain in the process, while intermediates adopt more selective approaches. We find that both over-reliance and minimal use result in weaker knowledge gains overall. Based on our results, we call on students and educators to adopt GenAI as a learning rather than a problem solving tool. Our study highlights the urgent need for guidance when integrating GenAI into programming education to foster deeper understanding.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13063v1" target="_blank" rel="noopener noreferrer">
                FGNet：利用特征引导注意力优化SAM2用于3D电子显微镜神经元分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FGNet: Leveraging Feature-Guided Attention to Refine SAM2 for 3D EM Neuron Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhenghua Li, Hang Chen, Zihao Sun, Kai Li, Xiaolin Hu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D电子显微镜下的神经元分割，属于计算机视觉中的生物医学图像分析领域。虽然提到了注意力机制，但其应用场景（3D EM神经元分割）与推荐系统、搜索或广告的核心技术需求完全无关，且没有显示出任何在异构数据处理或推荐相关任务中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:11:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13063v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13063v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate segmentation of neural structures in Electron Microscopy (EM) images is paramount for neuroscience. However, this task is challenged by intricate morphologies, low signal-to-noise ratios, and scarce annotations, limiting the accuracy and generalization of existing methods. To address these challenges, we seek to leverage the priors learned by visual foundation models on a vast amount of natural images to better tackle this task. Specifically, we propose a novel framework that can effectively transfer knowledge from Segment Anything 2 (SAM2), which is pre-trained on natural images, to the EM domain. We first use SAM2 to extract powerful, general-purpose features. To bridge the domain gap, we introduce a Feature-Guided Attention module that leverages semantic cues from SAM2 to guide a lightweight encoder, the Fine-Grained Encoder (FGE), in focusing on these challenging regions. Finally, a dual-affinity decoder generates both coarse and refined affinity maps. Experimental results demonstrate that our method achieves performance comparable to state-of-the-art (SOTA) approaches with the SAM2 weights frozen. Upon further fine-tuning on EM data, our method significantly outperforms existing SOTA methods. This study validates that transferring representations pre-trained on natural images, when combined with targeted domain-adaptive guidance, can effectively address the specific challenges in neuron segmentation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12959v1" target="_blank" rel="noopener noreferrer">
                基于知识引导的个性化联邦推荐
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Personalized Federated Recommendation With Knowledge Guidance
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jaehyung Lim, Wonbin Kweon, Woojoo Kim, Junyoung Kim, Dongha Kim, Hwanjo Yu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及联邦学习（Federated Learning），这属于明确列出的无关主题。虽然标题包含推荐系统，但联邦学习的核心关注点使其与当前聚焦的技术方向不相关。该论文主要解决隐私和分布式学习问题，而非核心推荐算法或LLM技术进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 04:35:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12959v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12959v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Federated Recommendation (FedRec) has emerged as a key paradigm for building privacy-preserving recommender systems. However, existing FedRec models face a critical dilemma: memory-efficient single-knowledge models suffer from a suboptimal knowledge replacement practice that discards valuable personalization, while high-performance dual-knowledge models are often too memory-intensive for practical on-device deployment. We propose Federated Recommendation with Knowledge Guidance (FedRKG), a model-agnostic framework that resolves this dilemma. The core principle, Knowledge Guidance, avoids full replacement and instead fuses global knowledge into preserved local embeddings, attaining the personalization benefits of dual-knowledge within a single-knowledge memory footprint. Furthermore, we introduce Adaptive Guidance, a fine-grained mechanism that dynamically modulates the intensity of this guidance for each user-item interaction, overcoming the limitations of static fusion methods. Extensive experiments on benchmark datasets demonstrate that FedRKG significantly outperforms state-of-the-art methods, validating the effectiveness of our approach. The code is available at https://github.com/Jaehyung-Lim/fedrkg.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13703v1" target="_blank" rel="noopener noreferrer">
                通用基础模型在医院运营场景中临床适用性不足
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generalist Foundation Models Are Not Clinical Enough for Hospital Operations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lavender Y. Jiang, Angelica Chen, Xu Han, Xujin Chris Liu, Radhika Dua, Kevin Ea...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文明确聚焦于医院运营和临床应用的特定领域，这属于医疗领域的具体应用，与推荐系统、搜索或广告的核心技术进展无关。论文讨论的是基础模型在医疗运营中的局限性，不涉及推荐、搜索或广告领域的技术创新或应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:52:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13703v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13703v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Hospitals and healthcare systems rely on operational decisions that determine patient flow, cost, and quality of care. Despite strong performance on medical knowledge and conversational benchmarks, foundation models trained on general text may lack the specialized knowledge required for these operational decisions. We introduce Lang1, a family of models (100M-7B parameters) pretrained on a specialized corpus blending 80B clinical tokens from NYU Langone Health's EHRs and 627B tokens from the internet. To rigorously evaluate Lang1 in real-world settings, we developed the REalistic Medical Evaluation (ReMedE), a benchmark derived from 668,331 EHR notes that evaluates five critical tasks: 30-day readmission prediction, 30-day mortality prediction, length of stay, comorbidity coding, and predicting insurance claims denial. In zero-shot settings, both general-purpose and specialized models underperform on four of five tasks (36.6%-71.7% AUROC), with mortality prediction being an exception. After finetuning, Lang1-1B outperforms finetuned generalist models up to 70x larger and zero-shot models up to 671x larger, improving AUROC by 3.64%-6.75% and 1.66%-23.66% respectively. We also observed cross-task scaling with joint finetuning on multiple tasks leading to improvement on other tasks. Lang1-1B effectively transfers to out-of-distribution settings, including other clinical tasks and an external health system. Our findings suggest that predictive capabilities for hospital operations require explicit supervised finetuning, and that this finetuning process is made more efficient by in-domain pretraining on EHR. Our findings support the emerging view that specialized LLMs can compete with generalist models in specialized tasks, and show that effective healthcare systems AI requires the combination of in-domain pretraining, supervised finetuning, and real-world evaluation beyond proxy benchmarks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13689v1" target="_blank" rel="noopener noreferrer">
                跨越边界：面向印度诗歌翻译与图像生成的多模态挑战
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Crossing Borders: A Multimodal Challenge for Indian Poetry Translation and Image Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sofia Jamil, Kotla Sai Charan, Sriparna Saha, Koustava Goswami, Joseph K J
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于诗歌翻译和图像生成，属于纯粹的AIGC和内容生成领域，与推荐系统、搜索或广告的核心技术无关。虽然涉及多模态概念，但其应用场景（印度诗歌）和任务目标（翻译与生成）与RecSys/Search/Ads的技术需求没有直接关联，且属于明确排除的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:41:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13689v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13689v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Indian poetry, known for its linguistic complexity and deep cultural resonance, has a rich and varied heritage spanning thousands of years. However, its layered meanings, cultural allusions, and sophisticated grammatical constructions often pose challenges for comprehension, especially for non-native speakers or readers unfamiliar with its context and language. Despite its cultural significance, existing works on poetry have largely overlooked Indian language poems. In this paper, we propose the Translation and Image Generation (TAI) framework, leveraging Large Language Models (LLMs) and Latent Diffusion Models through appropriate prompt tuning. Our framework supports the United Nations Sustainable Development Goals of Quality Education (SDG 4) and Reduced Inequalities (SDG 10) by enhancing the accessibility of culturally rich Indian-language poetry to a global audience. It includes (1) a translation module that uses an Odds Ratio Preference Alignment Algorithm to accurately translate morphologically rich poetry into English, and (2) an image generation module that employs a semantic graph to capture tokens, dependencies, and semantic relationships between metaphors and their meanings, to create visually meaningful representations of Indian poems. Our comprehensive experimental evaluation, including both human and quantitative assessments, demonstrates the superiority of TAI Diffusion in poem image generation tasks, outperforming strong baselines. To further address the scarcity of resources for Indian-language poetry, we introduce the Morphologically Rich Indian Language Poems MorphoVerse Dataset, comprising 1,570 poems across 21 low-resource Indian languages. By addressing the gap in poetry translation and visual comprehension, this work aims to broaden accessibility and enrich the reader's experience.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13646v1" target="_blank" rel="noopener noreferrer">
                Live-SWE-agent：软件工程代理能否在运行中自我进化？
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Live-SWE-agent: Can Software Engineering Agents Self-Evolve on the Fly?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chunqiu Steven Xia, Zhe Wang, Yan Yang, Yuxiang Wei, Lingming Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注软件工程领域的智能代理和自我进化能力，与推荐系统、搜索或广告的核心技术领域完全无关。论文标题表明其专注于软件开发过程的自动化，没有任何技术要素或潜在应用与我的关注领域相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:58:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13646v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13646v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SE</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) are reshaping almost all industries, including software engineering. In recent years, a number of LLM agents have been proposed to solve real-world software problems. Such software agents are typically equipped with a suite of coding tools and can autonomously decide the next actions to form complete trajectories to solve end-to-end software tasks. While promising, they typically require dedicated design and may still be suboptimal, since it can be extremely challenging and costly to exhaust the entire agent scaffold design space. Recognizing that software agents are inherently software themselves that can be further refined/modified, researchers have proposed a number of self-improving software agents recently, including the Darwin-Gödel Machine (DGM). Meanwhile, such self-improving agents require costly offline training on specific benchmarks and may not generalize well across different LLMs or benchmarks. In this paper, we propose Live-SWE-agent, the first live software agent that can autonomously and continuously evolve itself on-the-fly during runtime when solving real-world software problems. More specifically, Live-SWE-agent starts with the most basic agent scaffold with only access to bash tools (e.g., mini-SWE-agent), and autonomously evolves its own scaffold implementation while solving real-world software problems. Our evaluation on the widely studied SWE-bench Verified benchmark shows that Live-SWE-agent can achieve an impressive solve rate of 75.4% without test-time scaling, outperforming all existing open-source software agents and approaching the performance of the best proprietary solution. Moreover, Live-SWE-agent outperforms state-of-the-art manually crafted software agents on the recent SWE-Bench Pro benchmark, achieving the best-known solve rate of 45.8%.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13612v1" target="_blank" rel="noopener noreferrer">
                使用强化学习掌握物理奥林匹克竞赛
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            P1: Mastering Physics Olympiads with Reinforcement Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiacheng Chen, Qianjia Cheng, Fangchen Yu, Haiyuan Wan, Yuchen Zhang, Shenghe Zh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于使用强化学习解决物理奥林匹克竞赛问题，这属于物理学领域的特定应用。虽然提到了强化学习，但论文内容与推荐系统、搜索或广告领域没有明显关联，也不涉及LLM技术或Transformer架构的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:18:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13612v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13612v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent progress in large language models (LLMs) has moved the frontier from puzzle-solving to science-grade reasoning-the kind needed to tackle problems whose answers must stand against nature, not merely fit a rubric. Physics is the sharpest test of this shift, which binds symbols to reality in a fundamental way, serving as the cornerstone of most modern technologies. In this work, we manage to advance physics research by developing large language models with exceptional physics reasoning capabilities, especially excel at solving Olympiad-level physics problems. We introduce P1, a family of open-source physics reasoning models trained entirely through reinforcement learning (RL). Among them, P1-235B-A22B is the first open-source model with Gold-medal performance at the latest International Physics Olympiad (IPhO 2025), and wins 12 gold medals out of 13 international/regional physics competitions in 2024/2025. P1-30B-A3B also surpasses almost all other open-source models on IPhO 2025, getting a silver medal. Further equipped with an agentic framework PhysicsMinions, P1-235B-A22B+PhysicsMinions achieves overall No.1 on IPhO 2025, and obtains the highest average score over the 13 physics competitions. Besides physics, P1 models also present great performance on other reasoning tasks like math and coding, showing the great generalibility of P1 series.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13548v1" target="_blank" rel="noopener noreferrer">
                ForgeDAN：一种用于越狱对齐大型语言模型的进化框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ForgeDAN: An Evolutionary Framework for Jailbreaking Aligned Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Siyang Cheng, Gaotian Liu, Rui Mei, Yilin Wang, Kejia Zhang, Kaishuo Wei, Yuqi Y...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于LLM安全性和对抗性攻击（越狱），这属于安全相关主题，在您的无关主题列表中明确排除。尽管涉及LLM技术，但论文关注的是破坏模型对齐而非在搜索、推荐或广告中的应用，因此与您的核心焦点无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:19:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13548v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13548v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid adoption of large language models (LLMs) has brought both transformative applications and new security risks, including jailbreak attacks that bypass alignment safeguards to elicit harmful outputs. Existing automated jailbreak generation approaches e.g. AutoDAN, suffer from limited mutation diversity, shallow fitness evaluation, and fragile keyword-based detection. To address these limitations, we propose ForgeDAN, a novel evolutionary framework for generating semantically coherent and highly effective adversarial prompts against aligned LLMs. First, ForgeDAN introduces multi-strategy textual perturbations across \textit{character, word, and sentence-level} operations to enhance attack diversity; then we employ interpretable semantic fitness evaluation based on a text similarity model to guide the evolutionary process toward semantically relevant and harmful outputs; finally, ForgeDAN integrates dual-dimensional jailbreak judgment, leveraging an LLM-based classifier to jointly assess model compliance and output harmfulness, thereby reducing false positives and improving detection effectiveness. Our evaluation demonstrates ForgeDAN achieves high jailbreaking success rates while maintaining naturalness and stealth, outperforming existing SOTA solutions.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13529v1" target="_blank" rel="noopener noreferrer">
                迈向匈牙利语对话语音识别：引入BEA-Large和BEA-Dialogue数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Toward Conversational Hungarian Speech Recognition: Introducing the BEA-Large and BEA-Dialogue Datasets
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Máté Gedeon, Piroska Zsófia Barta, Péter Mihajlik, Tekla Etelka Gráczi, Anna Koh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于匈牙利语语音识别数据集创建，属于纯语音技术领域。语音识别本身与推荐系统、搜索或广告的核心技术栈没有直接关联，且论文未提及任何在推荐/搜索/广告领域的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:02:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13529v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13529v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.SD</span><span class="category-tag">eess.AS</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The advancement of automatic speech recognition (ASR) has been largely enhanced by extensive datasets in high-resource languages, while languages such as Hungarian remain underrepresented due to limited spontaneous and conversational corpora. To address this gap, we introduce two new datasets -- BEA-Large and BEA-Dialogue -- constructed from the previously unprocessed portions of the Hungarian speech corpus named BEA. BEA-Large extends BEA-Base with 255 hours of spontaneous speech from 433 speakers, enriched with detailed segment-level metadata. BEA-Dialogue, comprising 85 hours of spontaneous conversations, is a Hungarian speech corpus featuring natural dialogues partitioned into speaker-independent subsets, supporting research in conversational ASR and speaker diarization. We establish reproducible baselines on these datasets using publicly available ASR models, with the fine-tuned Fast Conformer model achieving word error rates as low as 14.18\% on spontaneous and 4.8\% on repeated speech. Diarization experiments yield diarization error rates between 13.05\% and 18.26\%, providing reference points for future improvements. The results highlight the persistent difficulty of conversational ASR, particularly due to disfluencies, overlaps, and informal speech patterns. By releasing these datasets and baselines, we aim to advance Hungarian speech technology and offer a methodological framework for developing spontaneous and conversational benchmarks in other languages.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13481v1" target="_blank" rel="noopener noreferrer">
                泰语财务披露中的方面级混淆情感及其对异常收益的影响
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Aspect-Level Obfuscated Sentiment in Thai Financial Disclosures and Its Impact on Abnormal Returns
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Attapol T. Rutherford, Sirisak Chueykamhang, Thachaparn Bunditlurdruk, Nanthicha...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于特定语言（泰语）的金融领域情感分析，属于领域特定的NLP应用。虽然涉及文本分析，但与推荐系统、搜索或广告的核心技术进展、LLM技术或Transformer架构改进没有直接关联。论文关注的是金融披露中的情感混淆和异常收益，属于金融NLP应用而非通用的推荐/搜索技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:17:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13481v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13481v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Understanding sentiment in financial documents is crucial for gaining insights into market behavior. These reports often contain obfuscated language designed to present a positive or neutral outlook, even when underlying conditions may be less favorable. This paper presents a novel approach using Aspect-Based Sentiment Analysis (ABSA) to decode obfuscated sentiment in Thai financial annual reports. We develop specific guidelines for annotating obfuscated sentiment in these texts and annotate more than one hundred financial reports. We then benchmark various text classification models on this annotated dataset, demonstrating strong performance in sentiment classification. Additionally, we conduct an event study to evaluate the real-world implications of our sentiment analysis on stock prices. Our results suggest that market reactions are selectively influenced by specific aspects within the reports. Our findings underscore the complexity of sentiment analysis in financial texts and highlight the importance of addressing obfuscated language to accurately assess market sentiment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13467v1" target="_blank" rel="noopener noreferrer">
                用于翻译质量评估的非线性评分模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Non-Linear Scoring Model for Translation Quality Evaluation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Serge Gladkoff, Lifeng Han, Katerina Gasova
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器翻译领域的质量评估，属于纯粹的NLP应用场景。虽然提到了评分模型，但这是针对翻译质量的特定评估，与推荐系统、搜索或广告的核心技术没有直接关联，也没有明显的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:09:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13467v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13467v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Analytic Translation Quality Evaluation (TQE), based on Multidimensional Quality Metrics (MQM), traditionally uses a linear error-to-penalty scale calibrated to a reference sample of 1000-2000 words. However, linear extrapolation biases judgment on samples of different sizes, over-penalizing short samples and under-penalizing long ones, producing misalignment with expert intuition. Building on the Multi-Range framework, this paper presents a calibrated, non-linear scoring model that better reflects how human content consumers perceive translation quality across samples of varying length. Empirical data from three large-scale enterprise environments shows that acceptable error counts grow logarithmically, not linearly, with sample size. Psychophysical and cognitive evidence, including the Weber-Fechner law and Cognitive Load Theory, supports this premise by explaining why the perceptual impact of additional errors diminishes while the cognitive burden grows with scale. We propose a two-parameter model E(x) = a * ln(1 + b * x), a, b > 0, anchored to a reference tolerance and calibrated from two tolerance points using a one-dimensional root-finding step. The model yields an explicit interval within which the linear approximation stays within +/-20 percent relative error and integrates into existing evaluation workflows with only a dynamic tolerance function added. The approach improves interpretability, fairness, and inter-rater reliability across both human and AI-generated translations. By operationalizing a perceptually valid scoring paradigm, it advances translation quality evaluation toward more accurate and scalable assessment. The model also provides a stronger basis for AI-based document-level evaluation aligned with human judgment. Implementation considerations for CAT/LQA systems and implications for human and AI-generated text evaluation are discussed.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13381v1" target="_blank" rel="noopener noreferrer">
                大型语言模型能否胜任合格儿科医生？在真实世界临床环境中的系统性评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Can Large Language Models Function as Qualified Pediatricians? A Systematic Evaluation in Real-World Clinical Contexts
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Siyu Zhu, Mouxiao Bian, Yue Xie, Yongyu Tang, Zhikang Yu, Tianbin Li, Pengcheng ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的LLM应用评估，具体针对儿科临床实践。这属于明确的无关主题范畴（医学/生物学领域特定应用），与推荐系统、搜索或广告技术没有任何关联。论文内容纯粹是医疗领域的LLM能力评估，没有任何技术可以应用于RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:54:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13381v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13381v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid rise of large language models (LLMs) in medicine, a key question is whether they can function as competent pediatricians in real-world clinical settings. We developed PEDIASBench, a systematic evaluation framework centered on a knowledge-system framework and tailored to realistic clinical environments. PEDIASBench assesses LLMs across three dimensions: application of basic knowledge, dynamic diagnosis and treatment capability, and pediatric medical safety and medical ethics. We evaluated 12 representative models released over the past two years, including GPT-4o, Qwen3-235B-A22B, and DeepSeek-V3, covering 19 pediatric subspecialties and 211 prototypical diseases. State-of-the-art models performed well on foundational knowledge, with Qwen3-235B-A22B achieving over 90% accuracy on licensing-level questions, but performance declined ~15% as task complexity increased, revealing limitations in complex reasoning. Multiple-choice assessments highlighted weaknesses in integrative reasoning and knowledge recall. In dynamic diagnosis and treatment scenarios, DeepSeek-R1 scored highest in case reasoning (mean 0.58), yet most models struggled to adapt to real-time patient changes. On pediatric medical ethics and safety tasks, Qwen2.5-72B performed best (accuracy 92.05%), though humanistic sensitivity remained limited. These findings indicate that pediatric LLMs are constrained by limited dynamic decision-making and underdeveloped humanistic care. Future development should focus on multimodal integration and a clinical feedback-model iteration loop to enhance safety, interpretability, and human-AI collaboration. While current LLMs cannot independently perform pediatric care, they hold promise for decision support, medical education, and patient communication, laying the groundwork for a safe, trustworthy, and collaborative intelligent pediatric healthcare system.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13335v1" target="_blank" rel="noopener noreferrer">
                AHaSIS：阿拉伯语方言情感分析共享任务
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AHaSIS: Shared Task on Sentiment Analysis for Arabic Dialects
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Maram Alharbi, Salmane Chafik, Saad Ezzini, Ruslan Mitkov, Tharindu Ranasinghe, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于阿拉伯语方言的情感分析共享任务，属于特定语言的情感分析研究。这与我的核心关注领域（推荐系统、搜索、广告）以及相关的LLM技术、Transformer架构进展或异构数据建模没有直接关联。情感分析虽然是NLP的一个子领域，但该论文的语言特定性和任务导向性质使其在推荐/搜索/广告应用中的潜在价值非常有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:06:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13335v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13335v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The hospitality industry in the Arab world increasingly relies on customer feedback to shape services, driving the need for advanced Arabic sentiment analysis tools. To address this challenge, the Sentiment Analysis on Arabic Dialects in the Hospitality Domain shared task focuses on Sentiment Detection in Arabic Dialects. This task leverages a multi-dialect, manually curated dataset derived from hotel reviews originally written in Modern Standard Arabic (MSA) and translated into Saudi and Moroccan (Darija) dialects. The dataset consists of 538 sentiment-balanced reviews spanning positive, neutral, and negative categories. Translations were validated by native speakers to ensure dialectal accuracy and sentiment preservation. This resource supports the development of dialect-aware NLP systems for real-world applications in customer experience analysis. More than 40 teams have registered for the shared task, with 12 submitting systems during the evaluation phase. The top-performing system achieved an F1 score of 0.81, demonstrating the feasibility and ongoing challenges of sentiment analysis across Arabic dialects.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13333v1" target="_blank" rel="noopener noreferrer">
                AutoMalDesc：面向网络威胁研究的大规模脚本分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AutoMalDesc: Large-Scale Script Analysis for Cyber Threat Research
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Alexandru-Mihai Apostu, Andrei Preda, Alexandra Daniela Damir, Diana Bolocan, Ra...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于网络安全领域的恶意脚本分析，属于网络安全技术范畴。这与我的核心关注点（推荐系统、搜索、广告以及相关的LLM和Transformer技术）完全无关，网络安全属于明确的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:05:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13333v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13333v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generating thorough natural language explanations for threat detections remains an open problem in cybersecurity research, despite significant advances in automated malware detection systems. In this work, we present AutoMalDesc, an automated static analysis summarization framework that, following initial training on a small set of expert-curated examples, operates independently at scale. This approach leverages an iterative self-paced learning pipeline to progressively enhance output quality through synthetic data generation and validation cycles, eliminating the need for extensive manual data annotation. Evaluation across 3,600 diverse samples in five scripting languages demonstrates statistically significant improvements between iterations, showing consistent gains in both summary quality and classification accuracy. Our comprehensive validation approach combines quantitative metrics based on established malware labels with qualitative assessment from both human experts and LLM-based judges, confirming both technical precision and linguistic coherence of generated summaries. To facilitate reproducibility and advance research in this domain, we publish our complete dataset of more than 100K script samples, including annotated seed (0.9K) and test (3.6K) datasets, along with our methodology and evaluation framework.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13329v1" target="_blank" rel="noopener noreferrer">
                RegionMarker：一种用于嵌入即服务版权保护的区域触发式语义水印框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RegionMarker: A Region-Triggered Semantic Watermarking Framework for Embedding-as-a-Service Copyright Protection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shufan Yang, Zifeng Cheng, Zhiwei Jiang, Yafeng Yin, Cong Wang, Shiping Ge, Yuch...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于数字水印和版权保护技术，这属于安全/隐私领域，被明确列为无关主题。虽然嵌入即服务可能涉及推荐系统中的嵌入技术，但论文的核心焦点是版权保护而非推荐算法本身。该工作没有提供任何在推荐、搜索或广告系统中应用的潜力，完全偏离了当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:04:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13329v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13329v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.CR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Embedding-as-a-Service (EaaS) is an effective and convenient deployment solution for addressing various NLP tasks. Nevertheless, recent research has shown that EaaS is vulnerable to model extraction attacks, which could lead to significant economic losses for model providers. For copyright protection, existing methods inject watermark embeddings into text embeddings and use them to detect copyright infringement. However, current watermarking methods often resist only a subset of attacks and fail to provide \textit{comprehensive} protection. To this end, we present the region-triggered semantic watermarking framework called RegionMarker, which defines trigger regions within a low-dimensional space and injects watermarks into text embeddings associated with these regions. By utilizing a secret dimensionality reduction matrix to project onto this subspace and randomly selecting trigger regions, RegionMarker makes it difficult for watermark removal attacks to evade detection. Furthermore, by embedding watermarks across the entire trigger region and using the text embedding as the watermark, RegionMarker is resilient to both paraphrasing and dimension-perturbation attacks. Extensive experiments on various datasets show that RegionMarker is effective in resisting different attack methods, thereby protecting the copyright of EaaS.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13290v1" target="_blank" rel="noopener noreferrer">
                置信度中的随机失活：人类-大语言模型对齐中的道德不确定性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dropouts in Confidence: Moral Uncertainty in Human-LLM Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jea Kwon, Luiz Felipe Vecchietti, Sungwon Park, Meeyoung Cha
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确涉及人类-LLM对齐中的道德不确定性，这属于伦理和价值观对齐范畴，被明确列为不相关主题。论文内容与推荐系统、搜索或广告中的核心技术进步、使能技术或直接应用完全无关，没有任何潜在的应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:13:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13290v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13290v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.CY</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Humans display significant uncertainty when confronted with moral dilemmas, yet the extent of such uncertainty in machines and AI agents remains underexplored. Recent studies have confirmed the overly confident tendencies of machine-generated responses, particularly in large language models (LLMs). As these systems are increasingly embedded in ethical decision-making scenarios, it is important to understand their moral reasoning and the inherent uncertainties in building reliable AI systems. This work examines how uncertainty influences moral decisions in the classical trolley problem, analyzing responses from 32 open-source models and 9 distinct moral dimensions. We first find that variance in model confidence is greater across models than within moral dimensions, suggesting that moral uncertainty is predominantly shaped by model architecture and training method. To quantify uncertainty, we measure binary entropy as a linear combination of total entropy, conditional entropy, and mutual information. To examine its effects, we introduce stochasticity into models via "dropout" at inference time. Our findings show that our mechanism increases total entropy, mainly through a rise in mutual information, while conditional entropy remains largely unchanged. Moreover, this mechanism significantly improves human-LLM moral alignment, with correlations in mutual information and alignment score shifts. Our results highlight the potential to better align model-generated decisions and human preferences by deliberately modulating uncertainty and reducing LLMs' confidence in morally complex scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13238v1" target="_blank" rel="noopener noreferrer">
                政治立场计算测量：基于文本的理想点估计算法综述
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Computational Measurement of Political Positions: A Review of Text-Based Ideal Point Estimation Algorithms
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Patrick Parschan, Charlott Jakob
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于政治立场的文本分析和理想点估计，属于政治学和计算社会科学领域。虽然涉及文本分析技术，但其应用场景（政治立场测量）与推荐系统、搜索或广告的核心技术需求没有直接关联，也不涉及LLM、Transformer架构或异构数据建模等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:01:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13238v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13238v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.CY</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This article presents the first systematic review of unsupervised and semi-supervised computational text-based ideal point estimation (CT-IPE) algorithms, methods designed to infer latent political positions from textual data. These algorithms are widely used in political science, communication, computational social science, and computer science to estimate ideological preferences from parliamentary speeches, party manifestos, and social media. Over the past two decades, their development has closely followed broader NLP trends -- beginning with word-frequency models and most recently turning to large language models (LLMs). While this trajectory has greatly expanded the methodological toolkit, it has also produced a fragmented field that lacks systematic comparison and clear guidance for applied use. To address this gap, we identified 25 CT-IPE algorithms through a systematic literature review and conducted a manual content analysis of their modeling assumptions and development contexts. To compare them meaningfully, we introduce a conceptual framework that distinguishes how algorithms generate, capture, and aggregate textual variance. On this basis, we identify four methodological families -- word-frequency, topic modeling, word embedding, and LLM-based approaches -- and critically assess their assumptions, interpretability, scalability, and limitations. Our review offers three contributions. First, it provides a structured synthesis of two decades of algorithm development, clarifying how diverse methods relate to one another. Second, it translates these insights into practical guidance for applied researchers, highlighting trade-offs in transparency, technical requirements, and validation strategies that shape algorithm choice. Third, it emphasizes that differences in estimation outcomes across algorithms are themselves informative, underscoring the need for systematic benchmarking.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13182v1" target="_blank" rel="noopener noreferrer">
                评估大型语言模型在罗马尼亚文本变音符号恢复中的表现：一项比较研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating Large Language Models for Diacritic Restoration in Romanian Texts: A Comparative Study
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mihai Dan Nadas, Laura Diosan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于罗马尼亚语文本的变音符号恢复，这是一个特定语言的NLP任务，与推荐系统、搜索或广告领域没有直接关联。论文内容属于纯粹的NLP应用范畴，不涉及任何推荐、搜索或广告相关的技术或应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:43:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13182v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13182v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Automatic diacritic restoration is crucial for text processing in languages with rich diacritical marks, such as Romanian. This study evaluates the performance of several large language models (LLMs) in restoring diacritics in Romanian texts. Using a comprehensive corpus, we tested models including OpenAI's GPT-3.5, GPT-4, GPT-4o, Google's Gemini 1.0 Pro, Meta's Llama 2 and Llama 3, MistralAI's Mixtral 8x7B Instruct, airoboros 70B, and OpenLLM-Ro's RoLlama 2 7B, under multiple prompt templates ranging from zero-shot to complex multi-shot instructions. Results show that models such as GPT-4o achieve high diacritic restoration accuracy, consistently surpassing a neutral echo baseline, while others, including Meta's Llama family, exhibit wider variability. These findings highlight the impact of model architecture, training data, and prompt design on diacritic restoration performance and outline promising directions for improving NLP tools for diacritic-rich languages.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13180v1" target="_blank" rel="noopener noreferrer">
                翻译熵：一种用于评估翻译系统的统计框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Translation Entropy: A Statistical Framework for Evaluating Translation Systems
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ronit D. Gross, Yanir Harel, Ido Kanter
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器翻译系统的评估框架，属于纯粹的NLP评估领域。虽然涉及统计方法，但没有任何与推荐系统、搜索或广告相关的潜在应用，完全属于被排除的'纯粹NLP-centric topics'范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:42:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13180v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13180v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The translation of written language has been known since the 3rd century BC; however, its necessity has become increasingly common in the information age. Today, many translators exist, based on encoder-decoder deep architectures, nevertheless, no quantitative objective methods are available to assess their performance, likely because the entropy of even a single language remains unknown. This study presents a quantitative method for estimating translation entropy, with the following key finding. Given a translator, several sentences that differ by only one selected token of a given pivot sentence yield identical translations. Analyzing the statistics of this phenomenon across an ensemble of such sentences, consisting each of a pivot selected token, yields the probabilities of replacing this specific token with others while preserving the translation. These probabilities constitute the entropy of the selected token, and the average across all selected pivot tokens provides an estimate of the translator's overall translation entropy, which is enhanced along the decoder blocks. This entropic measure allows for the quantitative ranking of several publicly available translators and reveals whether mutual translation entropy is symmetric. Extending the proposed method to include the replacement of two tokens in a given pivot sentence demonstrates a multiplicative effect, where translation degeneracy is proportional to the product of the degeneracies of the two tokens. These findings establish translation entropy as a measurable property and objective benchmarking of artificial translators. Results are based on MarianMT, T5-Base and NLLB-200 translators.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13169v1" target="_blank" rel="noopener noreferrer">
                TCM-5CEval：针对大型语言模型在中医领域综合临床研究能力的扩展深度评估基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TCM-5CEval: Extended Deep Evaluation Benchmark for LLM's Comprehensive Clinical Research Competence in Traditional Chinese Medicine
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianai Huang, Jiayuan Chen, Lu Lu, Pengcheng Chen, Tianbin Li, Bing Han, Wenchao...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于中医领域的临床研究评估基准，这属于医学/生物学特定领域应用，明确属于无关主题。该基准与推荐系统、搜索或广告的核心进展、使能技术或直接应用没有任何关联，完全超出了当前关注范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:15:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13169v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13169v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have demonstrated exceptional capabilities in general domains, yet their application in highly specialized and culturally-rich fields like Traditional Chinese Medicine (TCM) requires rigorous and nuanced evaluation. Building upon prior foundational work such as TCM-3CEval, which highlighted systemic knowledge gaps and the importance of cultural-contextual alignment, we introduce TCM-5CEval, a more granular and comprehensive benchmark. TCM-5CEval is designed to assess LLMs across five critical dimensions: (1) Core Knowledge (TCM-Exam), (2) Classical Literacy (TCM-LitQA), (3) Clinical Decision-making (TCM-MRCD), (4) Chinese Materia Medica (TCM-CMM), and (5) Clinical Non-pharmacological Therapy (TCM-ClinNPT). We conducted a thorough evaluation of fifteen prominent LLMs, revealing significant performance disparities and identifying top-performing models like deepseek\_r1 and gemini\_2\_5\_pro. Our findings show that while models exhibit proficiency in recalling foundational knowledge, they struggle with the interpretative complexities of classical texts. Critically, permutation-based consistency testing reveals widespread fragilities in model inference. All evaluated models, including the highest-scoring ones, displayed a substantial performance degradation when faced with varied question option ordering, indicating a pervasive sensitivity to positional bias and a lack of robust understanding. TCM-5CEval not only provides a more detailed diagnostic tool for LLM capabilities in TCM but aldso exposes fundamental weaknesses in their reasoning stability. To promote further research and standardized comparison, TCM-5CEval has been uploaded to the Medbench platform, joining its predecessor in the "In-depth Challenge for Comprehensive TCM Abilities" special track.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13159v1" target="_blank" rel="noopener noreferrer">
                孟加拉语ASR转录中区分重复性不流利与形态重叠：新型语料库与基准分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Distinguishing Repetition Disfluency from Morphological Reduplication in Bangla ASR Transcripts: A Novel Corpus and Benchmarking Analysis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zaara Zabeen Arpa, Sadnam Sakib Apurbo, Nazia Karim Khan Oishee, Ajwad Abrar
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于语音识别转录中的语言学问题（重复性不流利与形态重叠的区分），属于特定语言处理任务。这与推荐系统、搜索或广告的核心技术领域无直接关联，且不涉及LLM、Transformer架构进展或异构数据统一建模等当前关注方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:06:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13159v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13159v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Automatic Speech Recognition (ASR) transcripts, especially in low-resource languages like Bangla, contain a critical ambiguity: word-word repetitions can be either Repetition Disfluency (unintentional ASR error/hesitation) or Morphological Reduplication (a deliberate grammatical construct). Standard disfluency correction fails by erroneously deleting valid linguistic information. To solve this, we introduce the first publicly available, 20,000-row Bangla corpus, manually annotated to explicitly distinguish between these two phenomena in noisy ASR transcripts. We benchmark this novel resource using two paradigms: state-of-the-art multilingual Large Language Models (LLMs) and task-specific fine-tuning of encoder models. LLMs achieve competitive performance (up to 82.68\% accuracy) with few-shot prompting. However, fine-tuning proves superior, with the language-specific BanglaBERT model achieving the highest accuracy of 84.78\% and an F1 score of 0.677. This establishes a strong, linguistically-informed baseline and provides essential data for developing sophisticated, semantic-preserving text normalization systems for Bangla.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13107v1" target="_blank" rel="noopener noreferrer">
                评估大型语言模型识别随机对照试验中CONSORT报告指南依从性的能力：一项方法学评估研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating the Ability of Large Language Models to Identify Adherence to CONSORT Reporting Guidelines in Randomized Controlled Trials: A Methodological Evaluation Study
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhichao He, Mouxiao Bian, Jianhong Zhu, Jiayuan Chen, Yunqiu Wang, Wenxia Zhao, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的随机对照试验报告指南评估，属于医疗领域特定应用。虽然涉及LLM评估，但应用场景与推荐系统、搜索或广告完全无关，且属于被明确排除的医学应用范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:05:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13107v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13107v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The Consolidated Standards of Reporting Trials statement is the global benchmark for transparent and high-quality reporting of randomized controlled trials. Manual verification of CONSORT adherence is a laborious, time-intensive process that constitutes a significant bottleneck in peer review and evidence synthesis. This study aimed to systematically evaluate the accuracy and reliability of contemporary LLMs in identifying the adherence of published RCTs to the CONSORT 2010 statement under a zero-shot setting. We constructed a golden standard dataset of 150 published RCTs spanning diverse medical specialties. The primary outcome was the macro-averaged F1-score for the three-class classification task, supplemented by item-wise performance metrics and qualitative error analysis. Overall model performance was modest. The top-performing models, Gemini-2.5-Flash and DeepSeek-R1, achieved nearly identical macro F1 scores of 0.634 and Cohen's Kappa coefficients of 0.280 and 0.282, respectively, indicating only fair agreement with expert consensus. A striking performance disparity was observed across classes: while most models could identify compliant items with high accuracy (F1 score > 0.850), they struggled profoundly with identifying non-compliant and not applicable items, where F1 scores rarely exceeded 0.400. Notably, some high-profile models like GPT-4o underperformed, achieving a macro F1-score of only 0.521. LLMs show potential as preliminary screening assistants for CONSORT checks, capably identifying well-reported items. However, their current inability to reliably detect reporting omissions or methodological flaws makes them unsuitable for replacing human expertise in the critical appraisal of trial quality.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13095v1" target="_blank" rel="noopener noreferrer">
                BeDiscovER：推理语言模型时代的语篇理解基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BeDiscovER: The Benchmark of Discourse Understanding in the Era of Reasoning Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chuyuan Li, Giuseppe Carenini
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于语篇理解基准测试和推理语言模型的评估，属于纯粹的NLP评估基准范畴。虽然涉及语言模型，但论文内容聚焦于语篇理解这一特定NLP任务，与推荐系统、搜索或广告的核心技术进展没有直接关联，也不涉及Transformer架构改进或异构数据统一建模等关键技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:50:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13095v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13095v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce BeDiscovER (Benchmark of Discourse Understanding in the Era of Reasoning Language Models), an up-to-date, comprehensive suite for evaluating the discourse-level knowledge of modern LLMs. BeDiscovER compiles 5 publicly available discourse tasks across discourse lexicon, (multi-)sentential, and documental levels, with in total 52 individual datasets. It covers both extensively studied tasks such as discourse parsing and temporal relation extraction, as well as some novel challenges such as discourse particle disambiguation (e.g., ``just''), and also aggregates a shared task on Discourse Relation Parsing and Treebanking for multilingual and multi-framework discourse relation classification. We evaluate open-source LLMs: Qwen3 series, DeepSeek-R1, and frontier model such as GPT-5-mini on BeDiscovER, and find that state-of-the-art models exhibit strong performance in arithmetic aspect of temporal reasoning, but they struggle with full document reasoning and some subtle semantic and discourse phenomena, such as rhetorical relation recognition.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13043v1" target="_blank" rel="noopener noreferrer">
                Spark-Prover-X1：通过多样化数据训练进行形式化定理证明
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Spark-Prover-X1: Formal Theorem Proving Through Diverse Data Training
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinyuan Zhou, Yi Lei, Xiaoyu Zhou, Jingyi Sun, Yu Zhu, Zhongyi Ye, Weitai Zhang,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于形式化定理证明领域，这是一个与数理逻辑和自动推理相关的专业领域。论文标题表明其核心是定理证明系统的技术改进，与推荐系统、搜索、广告或Transformer架构没有任何直接或间接的联系。形式化定理证明在RecSys/Search/Ads领域没有明显的应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 06:44:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13043v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13043v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) have shown significant promise in automated theorem proving, yet progress is often constrained by the scarcity of diverse and high-quality formal language data. To address this issue, we introduce Spark-Prover-X1, a 7B parameter model trained via an three-stage framework designed to unlock the reasoning potential of more accessible and moderately-sized LLMs. The first stage infuses deep knowledge through continuous pre-training on a broad mathematical corpus, enhanced by a suite of novel data tasks. Key innovation is a "CoT-augmented state prediction" task to achieve fine-grained reasoning. The second stage employs Supervised Fine-tuning (SFT) within an expert iteration loop to specialize both the Spark-Prover-X1-7B and Spark-Formalizer-X1-7B models. Finally, a targeted round of Group Relative Policy Optimization (GRPO) is applied to sharpen the prover's capabilities on the most challenging problems. To facilitate robust evaluation, particularly on problems from real-world examinations, we also introduce ExamFormal-Bench, a new benchmark dataset of 402 formal problems. Experimental results demonstrate that Spark-Prover-X1-7B achieves state-of-the-art performance among similarly-sized open-source models, attaining a 37.0\% average pass rate (pass@32). It shows exceptional performance on difficult competition benchmarks, notably solving 27 problems on PutnamBench (pass@32) and achieving 24.0\% on CombiBench (pass@32). Our work validates that this diverse training data and progressively refined training pipeline provides an effective path for enhancing the formal reasoning capabilities of lightweight LLMs. Both Spark-Prover-X1-7B and Spark-Formalizer-X1-7B, along with the ExamFormal-Bench dataset, are made publicly available at:https://www.modelscope.cn/organization/iflytek, https://gitcode.com/ifly_opensource.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.12851v1" target="_blank" rel="noopener noreferrer">
                NeuroLex：一种用于脑电图报告理解与生成的轻量级领域语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NeuroLex: A Lightweight Domain Language Model for EEG Report Understanding and Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kang Yin, Hye-Bin Shin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域（脑电图报告）的特定应用，属于明确的无关主题范畴。虽然涉及语言模型技术，但其应用场景与搜索、推荐或广告系统完全无关，且没有表明对推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 00:44:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.12851v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.12851v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Clinical electroencephalogram (EEG) reports encode domain-specific linguistic conventions that general-purpose language models (LMs) fail to capture. We introduce NeuroLex, a lightweight domain-adaptive language model trained purely on EEG report text from the Harvard Electroencephalography Database. Unlike existing biomedical LMs, NeuroLex is tailored to the linguistic and diagnostic characteristics of EEG reporting, enabling it to serve as both an independent textual model and a decoder backbone for multimodal EEG-language systems. Using span-corruption pretraining and instruction-style fine-tuning on report polishing, paragraph summarization, and terminology question answering, NeuroLex learns the syntax and reasoning patterns characteristic of EEG interpretation. Comprehensive evaluations show that it achieves lower perplexity, higher extraction and summarization accuracy, better label efficiency, and improved robustness to negation and factual hallucination compared with general models of the same scale. With an EEG-aware linguistic backbone, NeuroLex bridges biomedical text modeling and brain-computer interface applications, offering a foundation for interpretable and language-driven neural decoding.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13713v1" target="_blank" rel="noopener noreferrer">
                自由形式场景编辑器：实现类似3D引擎中的多轮对象操作
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Free-Form Scene Editor: Enabling Multi-Round Object Manipulation like in a 3D Engine
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xincheng Shuai, Zhenyuan Qin, Henghui Ding, Dacheng Tao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D场景编辑和对象操作技术，属于纯粹的3D视觉和图形学领域。虽然提到了类似3D引擎的交互能力，但这与推荐系统、搜索或广告的核心技术没有直接关联，也不涉及LLM、Transformer架构或异构数据处理等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:57:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13713v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13713v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in text-to-image (T2I) diffusion models have significantly improved semantic image editing, yet most methods fall short in performing 3D-aware object manipulation. In this work, we present FFSE, a 3D-aware autoregressive framework designed to enable intuitive, physically-consistent object editing directly on real-world images. Unlike previous approaches that either operate in image space or require slow and error-prone 3D reconstruction, FFSE models editing as a sequence of learned 3D transformations, allowing users to perform arbitrary manipulations, such as translation, scaling, and rotation, while preserving realistic background effects (e.g., shadows, reflections) and maintaining global scene consistency across multiple editing rounds. To support learning of multi-round 3D-aware object manipulation, we introduce 3DObjectEditor, a hybrid dataset constructed from simulated editing sequences across diverse objects and scenes, enabling effective training under multi-round and dynamic conditions. Extensive experiments show that the proposed FFSE significantly outperforms existing methods in both single-round and multi-round 3D-aware editing scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13704v1" target="_blank" rel="noopener noreferrer">
                TiViBench：面向视频生成模型的视频思维推理基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TiViBench: Benchmarking Think-in-Video Reasoning for Video Generative Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Harold Haodong Chen, Disen Lan, Wen-Jie Shu, Qingyang Liu, Zihan Wang, Sirui Che...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频生成模型的基准测试和推理能力评估，属于纯粹的视觉生成领域。虽然标题提到'推理'，但这是针对视频内容生成的推理，与推荐系统、搜索或广告的核心技术没有直接关联。该工作属于纯粹的视觉生成和评估范畴，不符合当前关注的任何技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:52:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13704v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13704v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid evolution of video generative models has shifted their focus from producing visually plausible outputs to tackling tasks requiring physical plausibility and logical consistency. However, despite recent breakthroughs such as Veo 3's chain-of-frames reasoning, it remains unclear whether these models can exhibit reasoning capabilities similar to large language models (LLMs). Existing benchmarks predominantly evaluate visual fidelity and temporal coherence, failing to capture higher-order reasoning abilities. To bridge this gap, we propose TiViBench, a hierarchical benchmark specifically designed to evaluate the reasoning capabilities of image-to-video (I2V) generation models. TiViBench systematically assesses reasoning across four dimensions: i) Structural Reasoning & Search, ii) Spatial & Visual Pattern Reasoning, iii) Symbolic & Logical Reasoning, and iv) Action Planning & Task Execution, spanning 24 diverse task scenarios across 3 difficulty levels. Through extensive evaluations, we show that commercial models (e.g., Sora 2, Veo 3.1) demonstrate stronger reasoning potential, while open-source models reveal untapped potential that remains hindered by limited training scale and data diversity. To further unlock this potential, we introduce VideoTPO, a simple yet effective test-time strategy inspired by preference optimization. By performing LLM self-analysis on generated candidates to identify strengths and weaknesses, VideoTPO significantly enhances reasoning performance without requiring additional training, data, or reward models. Together, TiViBench and VideoTPO pave the way for evaluating and advancing reasoning in video generation models, setting a foundation for future research in this emerging field.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13684v1" target="_blank" rel="noopener noreferrer">
                无需训练的多视图扩展IC-Light：面向文本位置感知的场景重光照
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Training-Free Multi-View Extension of IC-Light for Textual Position-Aware Scene Relighting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiangnan Ye, Jiedong Zhuang, Lianrui Mu, Wenjie Zheng, Jiaqi Hu, Xingze Zou, Jin...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的场景重光照和多视图扩展技术，属于纯粹的视觉处理范畴。虽然提及文本位置感知，但这主要用于视觉场景理解而非推荐系统、搜索或广告应用。该技术缺乏与异构数据建模、Transformer架构改进或LLM在推荐/搜索领域应用的直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:37:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13684v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13684v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce GS-Light, an efficient, textual position-aware pipeline for text-guided relighting of 3D scenes represented via Gaussian Splatting (3DGS). GS-Light implements a training-free extension of a single-input diffusion model to handle multi-view inputs. Given a user prompt that may specify lighting direction, color, intensity, or reference objects, we employ a large vision-language model (LVLM) to parse the prompt into lighting priors. Using off-the-shelf estimators for geometry and semantics (depth, surface normals, and semantic segmentation), we fuse these lighting priors with view-geometry constraints to compute illumination maps and generate initial latent codes for each view. These meticulously derived init latents guide the diffusion model to generate relighting outputs that more accurately reflect user expectations, especially in terms of lighting direction. By feeding multi-view rendered images, along with the init latents, into our multi-view relighting model, we produce high-fidelity, artistically relit images. Finally, we fine-tune the 3DGS scene with the relit appearance to obtain a fully relit 3D scene. We evaluate GS-Light on both indoor and outdoor scenes, comparing it to state-of-the-art baselines including per-view relighting, video relighting, and scene editing methods. Using quantitative metrics (multi-view consistency, imaging quality, aesthetic score, semantic similarity, etc.) and qualitative assessment (user studies), GS-Light demonstrates consistent improvements over baselines. Code and assets will be made available upon publication.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13655v1" target="_blank" rel="noopener noreferrer">
                OlmoEarth：用于多模态地球观测的稳定潜在图像建模
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Henry Herzog, Favyen Bastani, Yawen Zhang, Gabriel Tseng, Joseph Redmon, Hadrien...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于地球观测的多模态图像建模，属于遥感领域的特定应用，与推荐系统、搜索或广告的核心技术焦点无关。论文标题表明这是纯粹的视觉/遥感研究，没有显示出在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:06:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13655v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13655v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Earth observation data presents a unique challenge: it is spatial like images, sequential like video or text, and highly multimodal. We present OlmoEarth: a multimodal, spatio-temporal foundation model that employs a novel self-supervised learning formulation, masking strategy, and loss all designed for the Earth observation domain. OlmoEarth achieves state-of-the-art performance compared to 12 other foundation models across a variety of research benchmarks and real-world tasks from external partners. When evaluating embeddings OlmoEarth achieves the best performance on 15 out of 24 tasks, and with full fine-tuning it is the best on 19 of 29 tasks. We deploy OlmoEarth as the backbone of an end-to-end platform for data collection, labeling, training, and inference of Earth observation models. The OlmoEarth Platform puts frontier foundation models and powerful data management tools into the hands of non-profits and NGOs working to solve the world's biggest problems. OlmoEarth source code, training data, and pre-trained weights are available at $\href{https://github.com/allenai/olmoearth_pretrain}{\text{https://github.com/allenai/olmoearth_pretrain}}$.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13654v1" target="_blank" rel="noopener noreferrer">
                针对双重对手的调优：通过超参数调优增强对抗迁移攻击和查询攻击的鲁棒性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Tuning for Two Adversaries: Enhancing the Robustness Against Transfer and Query-Based Attacks using Hyperparameter Tuning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Pascal Zimmer, Ghassan Karame
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于对抗性攻击的鲁棒性，这属于安全领域，明确列在无关主题中。虽然论文涉及超参数调优，但其核心关注点是防御机制而非推荐系统、搜索或广告的核心技术。没有明显的应用场景与我的关注领域相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 18:03:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13654v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13654v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CR</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this paper, we present the first detailed analysis of how optimization hyperparameters -- such as learning rate, weight decay, momentum, and batch size -- influence robustness against both transfer-based and query-based attacks. Supported by theory and experiments, our study spans a variety of practical deployment settings, including centralized training, ensemble learning, and distributed training. We uncover a striking dichotomy: for transfer-based attacks, decreasing the learning rate significantly enhances robustness by up to $64\%$. In contrast, for query-based attacks, increasing the learning rate consistently leads to improved robustness by up to $28\%$ across various settings and data distributions. Leveraging these findings, we explore -- for the first time -- the optimization hyperparameter design space to jointly enhance robustness against both transfer-based and query-based attacks. Our results reveal that distributed models benefit the most from hyperparameter tuning, achieving a remarkable tradeoff by simultaneously mitigating both attack types more effectively than other training setups.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13648v1" target="_blank" rel="noopener noreferrer">
                PhysX-Anything：从单张图像生成仿真就绪的物理三维资产
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ziang Cao, Fangzhou Hong, Zhaoxi Chen, Liang Pan, Ziwei Liu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉和物理仿真领域，涉及从单张图像生成物理三维资产。这与推荐系统、搜索或广告的核心技术焦点完全无关，也不涉及LLM、Transformer架构或异构数据建模。该技术主要面向图形学和仿真应用，没有明显的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:59:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13648v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13648v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D modeling is shifting from static visual representations toward physical, articulated assets that can be directly used in simulation and interaction. However, most existing 3D generation methods overlook key physical and articulation properties, thereby limiting their utility in embodied AI. To bridge this gap, we introduce PhysX-Anything, the first simulation-ready physical 3D generative framework that, given a single in-the-wild image, produces high-quality sim-ready 3D assets with explicit geometry, articulation, and physical attributes. Specifically, we propose the first VLM-based physical 3D generative model, along with a new 3D representation that efficiently tokenizes geometry. It reduces the number of tokens by 193x, enabling explicit geometry learning within standard VLM token budgets without introducing any special tokens during fine-tuning and significantly improving generative quality. In addition, to overcome the limited diversity of existing physical 3D datasets, we construct a new dataset, PhysX-Mobility, which expands the object categories in prior physical 3D datasets by over 2x and includes more than 2K common real-world objects with rich physical annotations. Extensive experiments on PhysX-Mobility and in-the-wild images demonstrate that PhysX-Anything delivers strong generative performance and robust generalization. Furthermore, simulation-based experiments in a MuJoCo-style environment validate that our sim-ready assets can be directly used for contact-rich robotic policy learning. We believe PhysX-Anything can substantially empower a broad range of downstream applications, especially in embodied AI and physics-based simulation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13621v1" target="_blank" rel="noopener noreferrer">
                用于生物特征验证的Alpha散度损失
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Alpha Divergence Losses for Biometric Verification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dimitrios Koutsianos, Ladislav Mosner, Yannis Panagakis, Themos Stafylakis
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注生物特征验证中的损失函数设计，属于计算机视觉中的特定应用领域。生物特征验证与推荐系统、搜索或广告的核心技术没有直接关联，也不涉及LLM、Transformer架构或异构数据建模等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:27:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13621v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13621v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Performance in face and speaker verification is largely driven by margin based softmax losses like CosFace and ArcFace. Recently introduced $α$-divergence loss functions offer a compelling alternative, particularly for their ability to induce sparse solutions (when $α>1$). However, integrating an angular margin-crucial for verification tasks-is not straightforward. We find this integration can be achieved in at least two distinct ways: via the reference measure (prior probabilities) or via the logits (unnormalized log-likelihoods). In this paper, we explore both pathways, deriving two novel margin-based $α$-divergence losses: Q-Margin (margin in the reference measure) and A3M (margin in the logits). We identify and address a critical training instability in A3M-caused by the interplay of penalized logits and sparsity-with a simple yet effective prototype re-initialization strategy. Our methods achieve significant performance gains on the challenging IJB-B and IJB-C face verification benchmarks. We demonstrate similarly strong performance in speaker verification on VoxCeleb. Crucially, our models significantly outperform strong baselines at low false acceptance rates (FAR). This capability is crucial for practical high-security applications, such as banking authentication, when minimizing false authentications is paramount.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13618v1" target="_blank" rel="noopener noreferrer">
                基于MediaPipe和眼部纵横比的实时驾驶员疲劳检测系统
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Real-Time Driver Drowsiness Detection System Using MediaPipe and Eye Aspect Ratio
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ashlesha G. Sawant, Shreyash S. Kamble, Raj S. Kanade, Raunak N. Kanugo, Tanishq...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉在驾驶员疲劳检测领域的应用，属于纯视觉技术范畴。虽然涉及实时系统，但没有任何与推荐系统、搜索或广告相关的技术内容或潜在应用场景，完全超出了当前关注的技术领域范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:22:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13618v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13618v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    One of the major causes of road accidents is driver fatigue that causes thousands of fatalities and injuries every year. This study shows development of a Driver Drowsiness Detection System meant to improve the safety of the road by alerting drivers who are showing signs of being drowsy. The system is based on a standard webcam that tracks the facial features of the driver with the main emphasis on the examination of eye movements that can be conducted with the help of the Eye Aspect Ratio (EAR) method. The Face Mesh by MediaPipe is a lightweight framework that can identify facial landmarks with high accuracy and efficiency, which is considered to be important in real time use. The system detects the moments of long eye shutdowns or a very low rate of blinking which are manifestations of drowsiness and alerts the driver through sound to get her attention back. This system achieves a high-performance and low-cost driver monitoring solution with the help of the computational power of OpenCV to process the image and the MediaPipe to identify faces. Test data experimental analyses indicate that the system is very accurate and responds quicker; this confirms that it can be a component of the current Advanced Driving Assistance System (ADAS).
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13615v1" target="_blank" rel="noopener noreferrer">
                面向组织病理学图像的组织感知细胞核检测与分类模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Tissue Aware Nuclei Detection and Classification Model for Histopathology Images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kesi Xu, Eleni Chiou, Ali Varamesh, Laura Acqualagna, Nasir Rajpoot
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像分析中的细胞核检测和分类，属于生物医学领域应用。这与我的关注点完全无关，因为我的关注领域仅限于搜索、推荐和广告系统中的技术进展，特别是与LLM、Transformer架构和异构数据处理相关的技术。该论文没有展示任何在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:21:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13615v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13615v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate nuclei detection and classification are fundamental to computational pathology, yet existing approaches are hindered by reliance on detailed expert annotations and insufficient use of tissue context. We present Tissue-Aware Nuclei Detection (TAND), a novel framework achieving joint nuclei detection and classification using point-level supervision enhanced by tissue mask conditioning. TAND couples a ConvNeXt-based encoder-decoder with a frozen Virchow-2 tissue segmentation branch, where semantic tissue probabilities selectively modulate the classification stream through a novel multi-scale Spatial Feature-wise Linear Modulation (Spatial-FiLM). On the PUMA benchmark, TAND achieves state-of-the-art performance, surpassing both tissue-agnostic baselines and mask-supervised methods. Notably, our approach demonstrates remarkable improvements in tissue-dependent cell types such as epithelium, endothelium, and stroma. To the best of our knowledge, this is the first method to condition per-cell classification on learned tissue masks, offering a practical pathway to reduce annotation burden.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13609v1" target="_blank" rel="noopener noreferrer">
                AtlasMorph：学习用于脑部MRI的条件可变形模板
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AtlasMorph: Learning conditional deformable templates for brain MRI
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Marianne Rakic, Andrew Hoopes, S. Mazdak Abulnaga, Mert R. Sabuncu, John V. Gutt...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像处理中的脑部MRI变形模板学习，属于生物医学领域的特定应用。这与搜索、推荐、广告系统或相关使能技术没有任何关联。论文内容完全落在被排除的医学/生物学特定应用范畴内，没有任何潜在的应用价值到当前关注的领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:13:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13609v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13609v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Deformable templates, or atlases, are images that represent a prototypical anatomy for a population, and are often enhanced with probabilistic anatomical label maps. They are commonly used in medical image analysis for population studies and computational anatomy tasks such as registration and segmentation. Because developing a template is a computationally expensive process, relatively few templates are available. As a result, analysis is often conducted with sub-optimal templates that are not truly representative of the study population, especially when there are large variations within this population. We propose a machine learning framework that uses convolutional registration neural networks to efficiently learn a function that outputs templates conditioned on subject-specific attributes, such as age and sex. We also leverage segmentations, when available, to produce anatomical segmentation maps for the resulting templates. The learned network can also be used to register subject images to the templates. We demonstrate our method on a compilation of 3D brain MRI datasets, and show that it can learn high-quality templates that are representative of populations. We find that annotated conditional templates enable better registration than their unlabeled unconditional counterparts, and outperform other templates construction methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13607v1" target="_blank" rel="noopener noreferrer">
                ICLR：用于低光照图像增强中自然色彩恢复的色度间与亮度交互
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ICLR: Inter-Chrominance and Luminance Interaction for Natural Color Restoration in Low-Light Image Enhancement
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xin Xu, Hao Liu, Wei Liu, Wei Wang, Jiayi Wu, Kui Jiang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的低光照图像增强技术，特别是色彩恢复问题。虽然标题提到"自然色彩恢复"，但这纯粹是视觉图像处理任务，与推荐系统、搜索或广告中的排序、用户建模、内容理解等核心问题没有任何关联。该技术没有明显的应用场景可以转化到RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 17:12:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13607v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13607v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Low-Light Image Enhancement (LLIE) task aims at improving contrast while restoring details and textures for images captured in low-light conditions. HVI color space has made significant progress in this task by enabling precise decoupling of chrominance and luminance. However, for the interaction of chrominance and luminance branches, substantial distributional differences between the two branches prevalent in natural images limit complementary feature extraction, and luminance errors are propagated to chrominance channels through the nonlinear parameter. Furthermore, for interaction between different chrominance branches, images with large homogeneous-color regions usually exhibit weak correlation between chrominance branches due to concentrated distributions. Traditional pixel-wise losses exploit strong inter-branch correlations for co-optimization, causing gradient conflicts in weakly correlated regions. Therefore, we propose an Inter-Chrominance and Luminance Interaction (ICLR) framework including a Dual-stream Interaction Enhancement Module (DIEM) and a Covariance Correction Loss (CCL). The DIEM improves the extraction of complementary information from two dimensions, fusion and enhancement, respectively. The CCL utilizes luminance residual statistics to penalize chrominance errors and balances gradient conflicts by constraining chrominance branches covariance. Experimental results on multiple datasets show that the proposed ICLR framework outperforms state-of-the-art methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13586v1" target="_blank" rel="noopener noreferrer">
                自适应多尺度集成解锁组织病理学图像中稳健的细胞注释
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Adaptive Multi-Scale Integration Unlocks Robust Cell Annotation in Histopathology Images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yinuo Xu, Yan Cui, Mingyao Li, Zhi Huang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像分析中的细胞注释，属于医疗/生物领域的特定应用。组织病理学图像分析与推荐系统、搜索或广告领域没有直接关联，也不涉及LLM技术、Transformer架构或异构数据建模在商业应用中的潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:49:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13586v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13586v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Identifying cell types and subtypes from routine histopathology images is essential for improving the computational understanding of human disease. Existing tile-based models can capture detailed nuclear morphology but often fail to incorporate the broader tissue context that influences a cell's function and identity. In addition, available human annotations are typically coarse-grained and unevenly distributed across studies, making fine-grained subtype-level supervision difficult to obtain. To address these limitations, we introduce NuClass, a pathologist workflow inspired framework for cell-wise multi-scale integration of nuclear morphology and microenvironmental context. NuClass includes two main components: Path local, which focuses on nuclear morphology from 224-by-224 pixel crops, and Path global, which models the surrounding 1024-by-1024 pixel neighborhood. A learnable gating module adaptively balances local detail and contextual cues. To encourage complementary learning, we incorporate an uncertainty-guided objective that directs the global path to prioritize regions where the local path is uncertain. We also provide calibrated confidence estimates and Grad-CAM visualizations to enhance interpretability. To overcome the lack of high-quality annotations, we construct a marker-guided dataset from Xenium spatial transcriptomics assays, yielding single-cell resolution labels for more than two million cells across eight organs and 16 classes. Evaluated on three fully held-out cohorts, NuClass achieves up to 96 percent F1 for its best-performing class, outperforming strong baselines. Our results show that multi-scale, uncertainty-aware fusion can bridge the gap between slide-level pathological foundation models and reliable, cell-level phenotype prediction.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13571v1" target="_blank" rel="noopener noreferrer">
                Opt3DGS：通过自适应探索和曲率感知利用优化3D高斯泼溅
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Opt3DGS: Optimizing 3D Gaussian Splatting with Adaptive Exploration and Curvature-Aware Exploitation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ziyang Huang, Jiagang Chen, Jin Liu, Shunping Ji
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D高斯泼溅优化技术，属于纯粹的3D视觉领域，与推荐系统、搜索或广告的核心技术栈没有直接关联。3D高斯泼溅主要用于3D场景重建和渲染，在RecSys/Search/Ads领域缺乏明确的应用场景和技术迁移路径。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:37:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13571v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13571v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D Gaussian Splatting (3DGS) has emerged as a leading framework for novel view synthesis, yet its core optimization challenges remain underexplored. We identify two key issues in 3DGS optimization: entrapment in suboptimal local optima and insufficient convergence quality. To address these, we propose Opt3DGS, a robust framework that enhances 3DGS through a two-stage optimization process of adaptive exploration and curvature-guided exploitation. In the exploration phase, an Adaptive Weighted Stochastic Gradient Langevin Dynamics (SGLD) method enhances global search to escape local optima. In the exploitation phase, a Local Quasi-Newton Direction-guided Adam optimizer leverages curvature information for precise and efficient convergence. Extensive experiments on diverse benchmark datasets demonstrate that Opt3DGS achieves state-of-the-art rendering quality by refining the 3DGS optimization process without modifying its underlying representation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13552v1" target="_blank" rel="noopener noreferrer">
                TSE-Net：基于单张遥感图像进行半监督单目高度估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TSE-Net: Semi-supervised Monocular Height Estimation from Single Remote Sensing Images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sining Chen, Xiao Xiang Zhu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于遥感图像的高度估计，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术无关。论文内容涉及单目视觉和半监督学习，但没有展示任何在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:22:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13552v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13552v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Monocular height estimation plays a critical role in 3D perception for remote sensing, offering a cost-effective alternative to multi-view or LiDAR-based methods. While deep learning has significantly advanced the capabilities of monocular height estimation, these methods remain fundamentally limited by the availability of labeled data, which are expensive and labor-intensive to obtain at scale. The scarcity of high-quality annotations hinders the generalization and performance of existing models. To overcome this limitation, we propose leveraging large volumes of unlabeled data through a semi-supervised learning framework, enabling the model to extract informative cues from unlabeled samples and improve its predictive performance. In this work, we introduce TSE-Net, a self-training pipeline for semi-supervised monocular height estimation. The pipeline integrates teacher, student, and exam networks. The student network is trained on unlabeled data using pseudo-labels generated by the teacher network, while the exam network functions as a temporal ensemble of the student network to stabilize performance. The teacher network is formulated as a joint regression and classification model: the regression branch predicts height values that serve as pseudo-labels, and the classification branch predicts height value classes along with class probabilities, which are used to filter pseudo-labels. Height value classes are defined using a hierarchical bi-cut strategy to address the inherent long-tailed distribution of heights, and the predicted class probabilities are calibrated with a Plackett-Luce model to reflect the expected accuracy of pseudo-labels. We evaluate the proposed pipeline on three datasets spanning different resolutions and imaging modalities. Codes are available at https://github.com/zhu-xlab/tse-net.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13535v1" target="_blank" rel="noopener noreferrer">
                准确率不足够：通过颜色偏移在联邦学习中投毒可解释性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Accuracy is Not Enough: Poisoning Interpretability in Federated Learning via Color Skew
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Farhin Farhad Riya, Shahinul Hoque, Jinyuan Stella Sun, Olivera Kotevska
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文明确涉及联邦学习（被列为不相关主题）和可解释性投毒攻击，这些主题与推荐系统、搜索或广告的核心技术进展无关。论文关注安全性和隐私方面的攻击，而非排名算法、Transformer架构改进或LLM在推荐/搜索中的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 16:07:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13535v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13535v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As machine learning models are increasingly deployed in safety-critical domains, visual explanation techniques have become essential tools for supporting transparency. In this work, we reveal a new class of attacks that compromise model interpretability without affecting accuracy. Specifically, we show that small color perturbations applied by adversarial clients in a federated learning setting can shift a model's saliency maps away from semantically meaningful regions while keeping the prediction unchanged. The proposed saliency-aware attack framework, called Chromatic Perturbation Module, systematically crafts adversarial examples by altering the color contrast between foreground and background in a way that disrupts explanation fidelity. These perturbations accumulate across training rounds, poisoning the global model's internal feature attributions in a stealthy and persistent manner. Our findings challenge a common assumption in model auditing that correct predictions imply faithful explanations and demonstrate that interpretability itself can be an attack surface. We evaluate this vulnerability across multiple datasets and show that standard training pipelines are insufficient to detect or mitigate explanation degradation, especially in the federated learning setting, where subtle color perturbations are harder to discern. Our attack reduces peak activation overlap in Grad-CAM explanations by up to 35% while preserving classification accuracy above 96% on all evaluated datasets.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13507v1" target="_blank" rel="noopener noreferrer">
                中国城中村消失与转型的测绘研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mapping the Vanishing and Transformation of Urban Villages in China
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenyu Zhang, Yao Tong, Yiqiu Liu, Rui Cao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于城市地理学和社会学领域的城中村演变研究，与推荐系统、搜索、广告或相关使能技术完全无关。论文内容涉及城市规划、土地利用和社会变迁，没有任何技术组件或潜在应用能够关联到我的关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:42:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13507v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13507v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Urban villages (UVs), informal settlements embedded within China's urban fabric, have undergone widespread demolition and redevelopment in recent decades. However, there remains a lack of systematic evaluation of whether the demolished land has been effectively reused, raising concerns about the efficacy and sustainability of current redevelopment practices. To address the gap, this study proposes a deep learning-based framework to monitor the spatiotemporal changes of UVs in China. Specifically, semantic segmentation of multi-temporal remote sensing imagery is first used to map evolving UV boundaries, and then post-demolition land use is classified into six categories based on the "remained-demolished-redeveloped" phase: incomplete demolition, vacant land, construction sites, buildings, green spaces, and others. Four representative cities from China's four economic regions were selected as the study areas, i.e., Guangzhou (East), Zhengzhou (Central), Xi'an (West), and Harbin (Northeast). The results indicate: 1) UV redevelopment processes were frequently prolonged; 2) redevelopment transitions primarily occurred in peripheral areas, whereas urban cores remained relatively stable; and 3) three spatiotemporal transformation pathways, i.e., synchronized redevelopment, delayed redevelopment, and gradual optimization, were revealed. This study highlights the fragmented, complex and nonlinear nature of UV redevelopment, underscoring the need for tiered and context-sensitive planning strategies. By linking spatial dynamics with the context of redevelopment policies, the findings offer valuable empirical insights that support more inclusive, efficient, and sustainable urban renewal, while also contributing to a broader global understanding of informal settlement transformations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13458v1" target="_blank" rel="noopener noreferrer">
                视觉语言模型中的信任：来自参与式用户工作坊的洞见
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Trust in Vision-Language Models: Insights from a Participatory User Workshop
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Agnese Chiatti, Lara Piccolo, Sara Bernardini, Matteo Matteucci, Viola Schiaffon...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于用户对视觉语言模型的信任问题，这属于伦理、用户体验等非技术性话题，明确列在无关主题中。虽然标题涉及视觉语言模型，但核心关注的是信任这一社会技术因素，而非模型架构、效率或实际应用，因此与当前技术焦点完全无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 15:04:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13458v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13458v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.HC</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the growing deployment of Vision-Language Models (VLMs), pre-trained on large image-text and video-text datasets, it is critical to equip users with the tools to discern when to trust these systems. However, examining how user trust in VLMs builds and evolves remains an open problem. This problem is exacerbated by the increasing reliance on AI models as judges for experimental validation, to bypass the cost and implications of running participatory design studies directly with users. Following a user-centred approach, this paper presents preliminary results from a workshop with prospective VLM users. Insights from this pilot workshop inform future studies aimed at contextualising trust metrics and strategies for participants' engagement to fit the case of user-VLM interaction.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13431v1" target="_blank" rel="noopener noreferrer">
                FUSE：一种基于流的形状间映射方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FUSE: A Flow-based Mapping Between Shapes
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lorenzo Olearo, Giulio Viganò, Daniele Baieri, Filippo Maggioli, Simone Melzi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题涉及计算机图形学中的形状映射问题，专注于几何处理和3D形状分析。这与推荐系统、搜索或广告的核心技术领域没有明显关联，也不涉及LLM、Transformer架构或异构数据建模等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:42:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13431v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13431v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce a novel neural representation for maps between 3D shapes based on flow-matching models, which is computationally efficient and supports cross-representation shape matching without large-scale training or data-driven procedures. 3D shapes are represented as the probability distribution induced by a continuous and invertible flow mapping from a fixed anchor distribution. Given a source and a target shape, the composition of the inverse flow (source to anchor) with the forward flow (anchor to target), we continuously map points between the two surfaces. By encoding the shapes with a pointwise task-tailored embedding, this construction provides an invertible and modality-agnostic representation of maps between shapes across point clouds, meshes, signed distance fields (SDFs), and volumetric data. The resulting representation consistently achieves high coverage and accuracy across diverse benchmarks and challenging settings in shape matching. Beyond shape matching, our framework shows promising results in other tasks, including UV mapping and registration of raw point cloud scans of human bodies.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13420v1" target="_blank" rel="noopener noreferrer">
                VOPE：重新审视视觉语言模型在自愿想象任务中的幻觉问题
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VOPE: Revisiting Hallucination of Vision-Language Models in Voluntary Imagination Task
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xingming Long, Jie Zhang, Shiguang Shan, Xilin Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉语言模型的幻觉问题，这属于纯粹的NLP中心主题，明确列在无关主题中。论文标题表明其研究的是VLM在想象任务中的幻觉评估，与推荐系统、搜索或广告的核心技术进展没有直接关联，也没有展示出在相关领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:32:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13420v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13420v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Most research on hallucinations in Large Vision-Language Models (LVLMs) focuses on factual description tasks that prohibit any output absent from the image. However, little attention has been paid to hallucinations in voluntary imagination tasks, e.g., story writing, where the models are expected to generate novel content beyond the given image. In these tasks, it is inappropriate to simply regard such imagined novel content as hallucinations. To address this limitation, we introduce Voluntary-imagined Object Presence Evaluation (VOPE)-a novel method to assess LVLMs' hallucinations in voluntary imagination tasks via presence evaluation. Specifically, VOPE poses recheck-based questions to evaluate how an LVLM interprets the presence of the imagined objects in its own response. The consistency between the model's interpretation and the object's presence in the image is then used to determine whether the model hallucinates when generating the response. We apply VOPE to several mainstream LVLMs and hallucination mitigation methods, revealing two key findings: (1) most LVLMs hallucinate heavily during voluntary imagination, and their performance in presence evaluation is notably poor on imagined objects; (2) existing hallucination mitigation methods show limited effect in voluntary imagination tasks, making this an important direction for future research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13417v1" target="_blank" rel="noopener noreferrer">
                任意分割流：基于任意数据源的快速国家级田地边界检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Delineate Anything Flow: Fast, Country-Level Field Boundary Detection from Any Source
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mykola Lavreniuk, Nataliia Kussul, Andrii Shelestov, Yevhenii Salii, Volodymyr K...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于计算机视觉中的边界检测任务，主要应用于地理空间和农业领域。虽然涉及大规模数据处理，但缺乏与推荐系统、搜索或广告的直接关联，也不涉及LLM、Transformer架构或异构数据建模等核心技术。该研究属于纯粹的视觉应用范畴，与我的技术焦点无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:30:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13417v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13417v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate delineation of agricultural field boundaries from satellite imagery is essential for land management and crop monitoring, yet existing methods often produce incomplete boundaries, merge adjacent fields, and struggle to scale. We present the Delineate Anything Flow (DelAnyFlow) methodology, a resolution-agnostic approach for large-scale field boundary mapping. DelAnyFlow combines the DelAny instance segmentation model, based on a YOLOv11 backbone and trained on the large-scale Field Boundary Instance Segmentation-22M (FBIS 22M) dataset, with a structured post-processing, merging, and vectorization sequence to generate topologically consistent vector boundaries. FBIS 22M, the largest dataset of its kind, contains 672,909 multi-resolution image patches (0.25-10m) and 22.9million validated field instances. The DelAny model delivers state-of-the-art accuracy with over 100% higher mAP and 400x faster inference than SAM2. DelAny demonstrates strong zero-shot generalization and supports national-scale applications: using Sentinel 2 data for 2024, DelAnyFlow generated a complete field boundary layer for Ukraine (603,000km2) in under six hours on a single workstation. DelAnyFlow outputs significantly improve boundary completeness relative to operational products from Sinergise Solutions and NASA Harvest, particularly in smallholder and fragmented systems (0.25-1ha). For Ukraine, DelAnyFlow delineated 3.75M fields at 5m and 5.15M at 2.5m, compared to 2.66M detected by Sinergise Solutions and 1.69M by NASA Harvest. This work delivers a scalable, cost-effective methodology for field delineation in regions lacking digital cadastral data. A project landing page with links to model weights, code, national-scale vector outputs, and dataset is available at https://lavreniuk.github.io/Delineate-Anything/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13399v1" target="_blank" rel="noopener noreferrer">
                TripleFDS：用于场景文本编辑的三重特征解缠与合成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TripleFDS: Triple Feature Disentanglement and Synthesis for Scene Text Editing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuchen Bao, Yiting Wang, Wenjian Huang, Haowei Wang, Shen Chen, Taiping Yao, Sho...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于场景文本编辑的计算机视觉任务，涉及图像中文本的修改和合成。虽然标题提到特征解缠和合成，但这属于纯粹的视觉处理领域，与推荐系统、搜索或广告中的排名、检索或用户建模没有直接关联。该技术没有明显的应用场景可以转化到RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:15:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13399v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13399v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Scene Text Editing (STE) aims to naturally modify text in images while preserving visual consistency, the decisive factors of which can be divided into three parts, i.e., text style, text content, and background. Previous methods have struggled with incomplete disentanglement of editable attributes, typically addressing only one aspect - such as editing text content - thus limiting controllability and visual consistency. To overcome these limitations, we propose TripleFDS, a novel framework for STE with disentangled modular attributes, and an accompanying dataset called SCB Synthesis. SCB Synthesis provides robust training data for triple feature disentanglement by utilizing the "SCB Group", a novel construct that combines three attributes per image to generate diverse, disentangled training groups. Leveraging this construct as a basic training unit, TripleFDS first disentangles triple features, ensuring semantic accuracy through inter-group contrastive regularization and reducing redundancy through intra-sample multi-feature orthogonality. In the synthesis phase, TripleFDS performs feature remapping to prevent "shortcut" phenomena during reconstruction and mitigate potential feature leakage. Trained on 125,000 SCB Groups, TripleFDS achieves state-of-the-art image fidelity (SSIM of 44.54) and text accuracy (ACC of 93.58%) on the mainstream STE benchmarks. Besides superior performance, the more flexible editing of TripleFDS supports new operations such as style replacement and background transfer. Code: https://github.com/yusenbao01/TripleFDS
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13397v1" target="_blank" rel="noopener noreferrer">
                描述符：距离标注的交通感知问答（DTPQA）
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Descriptor: Distance-Annotated Traffic Perception Question Answering (DTPQA)
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikos Theodoridis, Tim Brophy, Reenu Mohandas, Ganesh Sistu, Fiachra Collins, An...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确指向交通感知领域的问答系统，属于计算机视觉与自动驾驶的特定应用场景。虽然涉及问答技术，但其核心关注交通感知和距离标注，与推荐系统、搜索或广告领域没有直接关联，也不涉及LLM或Transformer架构在RecSys/Search/Ads中的潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 14:12:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13397v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13397v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The remarkable progress of Vision-Language Models (VLMs) on a variety of tasks has raised interest in their application to automated driving. However, for these models to be trusted in such a safety-critical domain, they must first possess robust perception capabilities, i.e., they must be capable of understanding a traffic scene, which can often be highly complex, with many things happening simultaneously. Moreover, since critical objects and agents in traffic scenes are often at long distances, we require systems with not only strong perception capabilities at close distances (up to 20 meters), but also at long (30+ meters) range. Therefore, it is important to evaluate the perception capabilities of these models in isolation from other skills like reasoning or advanced world knowledge. Distance-Annotated Traffic Perception Question Answering (DTPQA) is a Visual Question Answering (VQA) benchmark designed specifically for this purpose: it can be used to evaluate the perception systems of VLMs in traffic scenarios using trivial yet crucial questions relevant to driving decisions. It consists of two parts: a synthetic benchmark (DTP-Synthetic) created using a simulator, and a real-world benchmark (DTP-Real) built on top of existing images of real traffic scenes. Additionally, DTPQA includes distance annotations, i.e., how far the object in question is from the camera. More specifically, each DTPQA sample consists of (at least): (a) an image, (b) a question, (c) the ground truth answer, and (d) the distance of the object in question, enabling analysis of how VLM performance degrades with increasing object distance. In this article, we provide the dataset itself along with the Python scripts used to create it, which can be used to generate additional data of the same kind.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13353v1" target="_blank" rel="noopener noreferrer">
                用于眼底图像可解释质量评估的半监督多任务学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Semi-Supervised Multi-Task Learning for Interpretable Quality As- sessment of Fundus Images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lucas Gabriel Telesco, Danila Nejamkin, Estefanía Mata, Francisco Filizzola, Kev...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像（眼底图像）的质量评估，属于医疗领域特定应用。虽然涉及多任务学习技术，但其应用场景和问题定义完全局限于医疗影像分析，与推荐系统、搜索或广告领域没有任何直接或潜在的关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:17:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13353v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13353v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retinal image quality assessment (RIQA) supports computer-aided diagnosis of eye diseases. However, most tools classify only overall image quality, without indicating acquisition defects to guide recapture. This gap is mainly due to the high cost of detailed annotations. In this paper, we aim to mitigate this limitation by introducing a hybrid semi-supervised learning approach that combines manual labels for overall quality with pseudo-labels of quality details within a multi-task framework. Our objective is to obtain more interpretable RIQA models without requiring extensive manual labeling. Pseudo-labels are generated by a Teacher model trained on a small dataset and then used to fine-tune a pre-trained model in a multi-task setting. Using a ResNet-18 backbone, we show that these weak annotations improve quality assessment over single-task baselines (F1: 0.875 vs. 0.863 on EyeQ, and 0.778 vs. 0.763 on DeepDRiD), matching or surpassing existing methods. The multi-task model achieved performance statistically comparable to the Teacher for most detail prediction tasks (p > 0.05). In a newly annotated EyeQ subset released with this paper, our model performed similarly to experts, suggesting that pseudo-label noise aligns with expert variability. Our main finding is that the proposed semi-supervised approach not only improves overall quality assessment but also provides interpretable feedback on capture conditions (illumination, clarity, contrast). This enhances interpretability at no extra manual labeling cost and offers clinically actionable outputs to guide image recapture.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13344v1" target="_blank" rel="noopener noreferrer">
                YOLO与专家混合模型相遇：面向鲁棒目标检测的自适应专家路由
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            YOLO Meets Mixture-of-Experts: Adaptive Expert Routing for Robust Object Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ori Meiraz, Sharon Shalev, Avishai Weizman
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于计算机视觉中的目标检测，结合YOLO架构与专家混合(MoE)技术进行自适应路由。虽然涉及MoE这一Transformer相关技术，但论文明确针对视觉目标检测任务，没有展示与推荐系统、搜索或广告领域的潜在应用关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 13:11:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13344v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13344v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper presents a novel Mixture-of-Experts framework for object detection, incorporating adaptive routing among multiple YOLOv9-T experts to enable dynamic feature specialization and achieve higher mean Average Precision (mAP) and Average Recall (AR) compared to a single YOLOv9-T model.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13309v1" target="_blank" rel="noopener noreferrer">
                DriveLiDAR4D：用于自动驾驶的顺序可控LiDAR场景生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DriveLiDAR4D: Sequential and Controllable LiDAR Scene Generation for Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kaiwen Cai, Xinze Liu, Xia Zhou, Hengtong Hu, Jie Xiang, Luyao Zhang, Xueyang Zh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域的LiDAR数据生成，属于纯粹的计算机视觉和3D视觉应用。虽然涉及序列生成技术，但缺乏与推荐系统、搜索或广告领域的明确关联。论文的技术内容与用户行为建模、内容排序或广告投放等核心业务场景没有直接联系。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:43:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13309v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13309v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The generation of realistic LiDAR point clouds plays a crucial role in the development and evaluation of autonomous driving systems. Although recent methods for 3D LiDAR point cloud generation have shown significant improvements, they still face notable limitations, including the lack of sequential generation capabilities and the inability to produce accurately positioned foreground objects and realistic backgrounds. These shortcomings hinder their practical applicability. In this paper, we introduce DriveLiDAR4D, a novel LiDAR generation pipeline consisting of multimodal conditions and a novel sequential noise prediction model LiDAR4DNet, capable of producing temporally consistent LiDAR scenes with highly controllable foreground objects and realistic backgrounds. To the best of our knowledge, this is the first work to address the sequential generation of LiDAR scenes with full scene manipulation capability in an end-to-end manner. We evaluated DriveLiDAR4D on the nuScenes and KITTI datasets, where we achieved an FRD score of 743.13 and an FVD score of 16.96 on the nuScenes dataset, surpassing the current state-of-the-art (SOTA) method, UniScene, with an performance boost of 37.2% in FRD and 24.1% in FVD, respectively.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13306v1" target="_blank" rel="noopener noreferrer">
                DAP：一种用于自动驾驶的离散令牌自回归规划器
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DAP: A Discrete-token Autoregressive Planner for Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bowen Ye, Bin Zhang, Hang Zhao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域，这是一个与推荐系统、搜索或广告完全无关的垂直应用领域。论文标题中提到的离散令牌自回归规划器技术虽然可能涉及序列建模，但其应用场景和核心技术焦点与我的关注领域没有任何直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:31:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13306v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13306v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Gaining sustainable performance improvement with scaling data and model budget remains a pivotal yet unresolved challenge in autonomous driving. While autoregressive models exhibited promising data-scaling efficiency in planning tasks, predicting ego trajectories alone suffers sparse supervision and weakly constrains how scene evolution should shape ego motion. Therefore, we introduce DAP, a discrete-token autoregressive planner that jointly forecasts BEV semantics and ego trajectories, thereby enforcing comprehensive representation learning and allowing predicted dynamics to directly condition ego motion. In addition, we incorporate a reinforcement-learning-based fine-tuning, which preserves supervised behavior cloning priors while injecting reward-guided improvements. Despite a compact 160M parameter budget, DAP achieves state-of-the-art performance on open-loop metrics and delivers competitive closed-loop results on the NAVSIM benchmark. Overall, the fully discrete-token autoregressive formulation operating on both rasterized BEV and ego actions provides a compact yet scalable planning paradigm for autonomous driving.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13297v1" target="_blank" rel="noopener noreferrer">
                CorrectAD：一种用于改进自动驾驶端到端规划的自校正代理系统
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CorrectAD: A Self-Correcting Agentic System to Improve End-to-end Planning in Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Enhui Ma, Lijun Zhou, Tao Tang, Jiahuan Zhang, Junpeng Jiang, Zhan Zhang, Dong H...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域的规划系统，属于特定领域应用。虽然提到了代理系统，但其核心应用场景（自动驾驶）与推荐系统、搜索或广告领域没有直接关联。论文内容不涉及LLM技术、Transformer架构进展，也没有展示在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:21:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13297v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13297v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    End-to-end planning methods are the de facto standard of the current autonomous driving system, while the robustness of the data-driven approaches suffers due to the notorious long-tail problem (i.e., rare but safety-critical failure cases). In this work, we explore whether recent diffusion-based video generation methods (a.k.a. world models), paired with structured 3D layouts, can enable a fully automated pipeline to self-correct such failure cases. We first introduce an agent to simulate the role of product manager, dubbed PM-Agent, which formulates data requirements to collect data similar to the failure cases. Then, we use a generative model that can simulate both data collection and annotation. However, existing generative models struggle to generate high-fidelity data conditioned on 3D layouts. To address this, we propose DriveSora, which can generate spatiotemporally consistent videos aligned with the 3D annotations requested by PM-Agent. We integrate these components into our self-correcting agentic system, CorrectAD. Importantly, our pipeline is an end-to-end model-agnostic and can be applied to improve any end-to-end planner. Evaluated on both nuScenes and a more challenging in-house dataset across multiple end-to-end planners, CorrectAD corrects 62.5% and 49.8% of failure cases, reducing collision rates by 39% and 27%, respectively.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13285v1" target="_blank" rel="noopener noreferrer">
                SkyReels-Text：面向海报设计的细粒度字体可控文本编辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SkyReels-Text: Fine-grained Font-Controllable Text Editing for Poster Design
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yunjie Yu, Jingchen Wu, Junchen Zhu, Chunze Lin, Guibin Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于海报设计中的文本编辑和字体控制，属于内容生成和AIGC领域。这与我的关注点（推荐系统、搜索、广告中的核心算法进展、LLM技术应用）完全无关，属于明确的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:02:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13285v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13285v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Artistic design such as poster design often demands rapid yet precise modification of textual content while preserving visual harmony and typographic intent, especially across diverse font styles. Although modern image editing models have grown increasingly powerful, they still fall short in fine-grained, font-aware text manipulation, limiting their utility in professional design workflows such as poster editing. To address this issue, we present SkyReels-Text, a novel font-controllable framework for precise poster text editing. Our method enables simultaneous editing of multiple text regions, each rendered in distinct typographic styles, while preserving the visual appearance of non-edited regions. Notably, our model requires neither font labels nor fine-tuning during inference: users can simply provide cropped glyph patches corresponding to their desired typography, even if the font is not included in any standard library. Extensive experiments on multiple datasets, including handwrittent text benchmarks, SkyReels-Text achieves state-of-the-art performance in both text fidelity and visual realism, offering unprecedented control over font families, and stylistic nuances. This work bridges the gap between general-purpose image editing and professional-grade typographic design.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13282v1" target="_blank" rel="noopener noreferrer">
                通过联合优化相机空间中的人群实现度量感知的多人物网格恢复
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kaiwen Wang, Kaili Zheng, Yiming Shi, Chenyi Guo, Ji Wu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于计算机视觉中的3D人体网格恢复和人群优化，属于纯粹的视觉技术领域。虽然提到了多人物场景，但没有展示与推荐系统、搜索或广告的明确关联。该技术主要面向人体姿态估计和3D重建应用，在我的关注领域中缺乏直接相关性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 12:00:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13282v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13282v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multi-person human mesh recovery from a single image is a challenging task, hindered by the scarcity of in-the-wild training data. Prevailing in-the-wild human mesh pseudo-ground-truth (pGT) generation pipelines are single-person-centric, where each human is processed individually without joint optimization. This oversight leads to a lack of scene-level consistency, producing individuals with conflicting depths and scales within the same image. To address this, we introduce Depth-conditioned Translation Optimization (DTO), a novel optimization-based method that jointly refines the camera-space translations of all individuals in a crowd. By leveraging anthropometric priors on human height and depth cues from a monocular depth estimator, DTO solves for a scene-consistent placement of all subjects within a principled Maximum a posteriori (MAP) framework. Applying DTO to the 4D-Humans dataset, we construct DTO-Humans, a new large-scale pGT dataset of 0.56M high-quality, scene-consistent multi-person images, featuring dense crowds with an average of 4.8 persons per image. Furthermore, we propose Metric-Aware HMR, an end-to-end network that directly estimates human mesh and camera parameters in metric scale. This is enabled by a camera branch and a novel relative metric loss that enforces plausible relative scales. Extensive experiments demonstrate that our method achieves state-of-the-art performance on relative depth reasoning and human mesh recovery. Code and data will be released publicly.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13278v1" target="_blank" rel="noopener noreferrer">
                SF-Recon：基于3D高斯泼溅的无简化轻量级建筑重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SF-Recon: Simplification-Free Lightweight Building Reconstruction via 3D Gaussian Splatting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zihan Li, Tengfei Wang, Wentian Gan, Hao Zhan, Xin Wang, Zongqian Zhan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D建筑重建的计算机视觉任务，属于纯粹的3D视觉领域。虽然3D高斯泼溅是一种新兴的渲染技术，但该工作主要解决建筑几何重建问题，与推荐系统、搜索或广告的核心技术栈没有直接关联，也没有展示在异构数据处理或Transformer架构方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:50:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13278v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13278v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Lightweight building surface models are crucial for digital city, navigation, and fast geospatial analytics, yet conventional multi-view geometry pipelines remain cumbersome and quality-sensitive due to their reliance on dense reconstruction, meshing, and subsequent simplification. This work presents SF-Recon, a method that directly reconstructs lightweight building surfaces from multi-view images without post-hoc mesh simplification. We first train an initial 3D Gaussian Splatting (3DGS) field to obtain a view-consistent representation. Building structure is then distilled by a normal-gradient-guided Gaussian optimization that selects primitives aligned with roof and wall boundaries, followed by multi-view edge-consistency pruning to enhance structural sharpness and suppress non-structural artifacts without external supervision. Finally, a multi-view depth-constrained Delaunay triangulation converts the structured Gaussian field into a lightweight, structurally faithful building mesh. Based on a proposed SF dataset, the experimental results demonstrate that our SF-Recon can directly reconstruct lightweight building models from multi-view imagery, achieving substantially fewer faces and vertices while maintaining computational efficiency. Website:https://lzh282140127-cell.github.io/SF-Recon-project/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13264v1" target="_blank" rel="noopener noreferrer">
                SymGS：利用局部对称性进行3D高斯泼溅压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SymGS : Leveraging Local Symmetries for 3D Gaussian Splatting Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Keshav Gupta, Akshat Sanghvi, Shreyas Reddy Palley, Astitva Srivastava, Charu Sh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D高斯泼溅压缩技术，属于计算机图形学和3D视觉领域。虽然3D高斯泼溅在3D重建中有应用，但该论文主要关注压缩技术本身，与推荐系统、搜索或广告的核心技术没有直接关联。标题中提到的局部对称性压缩方法在推荐、搜索或广告领域缺乏明确的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 11:26:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13264v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13264v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.GR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D Gaussian Splatting has emerged as a transformative technique in novel view synthesis, primarily due to its high rendering speed and photorealistic fidelity. However, its memory footprint scales rapidly with scene complexity, often reaching several gigabytes. Existing methods address this issue by introducing compression strategies that exploit primitive-level redundancy through similarity detection and quantization. We aim to surpass the compression limits of such methods by incorporating symmetry-aware techniques, specifically targeting mirror symmetries to eliminate redundant primitives. We propose a novel compression framework, \textbf{\textit{SymGS}}, introducing learnable mirrors into the scene, thereby eliminating local and global reflective redundancies for compression. Our framework functions as a plug-and-play enhancement to state-of-the-art compression methods, (e.g. HAC) to achieve further compression. Compared to HAC, we achieve $1.66 \times$ compression across benchmark datasets (upto $3\times$ on large-scale scenes). On an average, SymGS enables $\bf{108\times}$ compression of a 3DGS scene, while preserving rendering quality. The project page and supplementary can be found at \textbf{\color{cyan}{symgs.github.io}}
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13232v1" target="_blank" rel="noopener noreferrer">
                MRIQT：用于新生儿超低场磁共振成像图像质量迁移的物理感知扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MRIQT: Physics-Aware Diffusion Model for Image Quality Transfer in Neonatal Ultra-Low-Field MRI
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Malek Al Abed, Sebiha Demir, Anne Groteklaes, Elodie Germani, Shahrooz Faghihroo...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像领域的超低场磁共振成像质量迁移，属于明确的医学应用范畴。虽然使用了扩散模型技术，但论文内容完全围绕新生儿MRI这一特定医疗场景，与推荐系统、搜索或广告领域没有任何关联，也不具备在这些领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:51:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13232v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13232v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Portable ultra-low-field MRI (uLF-MRI, 0.064 T) offers accessible neuroimaging for neonatal care but suffers from low signal-to-noise ratio and poor diagnostic quality compared to high-field (HF) MRI. We propose MRIQT, a 3D conditional diffusion framework for image quality transfer (IQT) from uLF to HF MRI. MRIQT combines realistic K-space degradation for physics-consistent uLF simulation, v-prediction with classifier-free guidance for stable image-to-image generation, and an SNR-weighted 3D perceptual loss for anatomical fidelity. The model denoises from a noised uLF input conditioned on the same scan, leveraging volumetric attention-UNet architecture for structure-preserving translation. Trained on a neonatal cohort with diverse pathologies, MRIQT surpasses recent GAN and CNN baselines in PSNR 15.3% with 1.78% over the state of the art, while physicians rated 85% of its outputs as good quality with clear pathology present. MRIQT enables high-fidelity, diffusion-based enhancement of portable ultra-low-field (uLF) MRI for deliable neonatal brain assessment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13222v1" target="_blank" rel="noopener noreferrer">
                混合域自适应表征学习用于视线估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Hybrid-Domain Adaptative Representation Learning for Gaze Estimation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qida Tan, Hongyu Yang, Wenchao Du
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的视线估计任务，属于纯粹的视觉技术研究。虽然涉及表征学习和域自适应技术，但视线估计在搜索、推荐或广告系统中缺乏直接应用场景，无法建立与推荐系统核心业务的明确联系。该工作主要服务于人机交互、行为分析等视觉应用领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:38:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13222v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13222v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Appearance-based gaze estimation, aiming to predict accurate 3D gaze direction from a single facial image, has made promising progress in recent years. However, most methods suffer significant performance degradation in cross-domain evaluation due to interference from gaze-irrelevant factors, such as expressions, wearables, and image quality. To alleviate this problem, we present a novel Hybrid-domain Adaptative Representation Learning (shorted by HARL) framework that exploits multi-source hybrid datasets to learn robust gaze representation. More specifically, we propose to disentangle gaze-relevant representation from low-quality facial images by aligning features extracted from high-quality near-eye images in an unsupervised domain-adaptation manner, which hardly requires any computational or inference costs. Additionally, we analyze the effect of head-pose and design a simple yet efficient sparse graph fusion module to explore the geometric constraint between gaze direction and head-pose, leading to a dense and robust gaze representation. Extensive experiments on EyeDiap, MPIIFaceGaze, and Gaze360 datasets demonstrate that our approach achieves state-of-the-art accuracy of $\textbf{5.02}^{\circ}$ and $\textbf{3.36}^{\circ}$, and $\textbf{9.26}^{\circ}$ respectively, and present competitive performances through cross-dataset evaluation. The code is available at https://github.com/da60266/HARL.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13197v1" target="_blank" rel="noopener noreferrer">
                自监督超声屏幕检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Self-Supervised Ultrasound Screen Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Alberto Gomez, Jorge Oliveira, Ramon Casero, Agis Chartsias
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学超声图像的屏幕检测，属于医疗领域的计算机视觉应用。这与推荐系统、搜索或广告的核心技术领域完全无关，也不涉及LLM、Transformer架构或异构数据建模等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:08:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13197v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13197v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Ultrasound (US) machines display images on a built-in monitor, but routine transfer to hospital systems relies on DICOM. We propose a self-supervised pipeline to extract the US image from a photograph of the monitor. This removes the DICOM bottleneck and enables rapid testing and prototyping of new algorithms. In a proof-of-concept study, the rectified images retained enough visual fidelity to classify cardiac views with a balanced accuracy of 0.79 with respect to the native DICOMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13195v1" target="_blank" rel="noopener noreferrer">
                难度感知标签引导去噪的单目3D目标检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Difficulty-Aware Label-Guided Denoising for Monocular 3D Object Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Soyul Lee, Seungmin Baek, Dongbo Min
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的单目3D目标检测，属于纯粹的视觉技术研究。虽然提到了去噪和标签引导等概念，但这些技术主要针对视觉感知任务，与推荐系统、搜索或广告的排名和建模需求没有直接关联。论文内容不涉及任何文本处理、用户行为建模或推荐算法等核心关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 10:02:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13195v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13195v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Monocular 3D object detection is a cost-effective solution for applications like autonomous driving and robotics, but remains fundamentally ill-posed due to inherently ambiguous depth cues. Recent DETR-based methods attempt to mitigate this through global attention and auxiliary depth prediction, yet they still struggle with inaccurate depth estimates. Moreover, these methods often overlook instance-level detection difficulty, such as occlusion, distance, and truncation, leading to suboptimal detection performance. We propose MonoDLGD, a novel Difficulty-Aware Label-Guided Denoising framework that adaptively perturbs and reconstructs ground-truth labels based on detection uncertainty. Specifically, MonoDLGD applies stronger perturbations to easier instances and weaker ones into harder cases, and then reconstructs them to effectively provide explicit geometric supervision. By jointly optimizing label reconstruction and 3D object detection, MonoDLGD encourages geometry-aware representation learning and improves robustness to varying levels of object complexity. Extensive experiments on the KITTI benchmark demonstrate that MonoDLGD achieves state-of-the-art performance across all difficulty levels.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13191v1" target="_blank" rel="noopener noreferrer">
                绘画的诞生：可微分笔触重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Birth of a Painting: Differentiable Brushstroke Reconstruction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ying Jiang, Jiayin Lu, Yunuo Chen, Yumeng He, Kui Wu, Yin Yang, Chenfanfu Jiang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机图形学中的笔触重建和绘画生成技术，属于纯粹的视觉内容生成领域。虽然涉及可微分渲染技术，但没有任何与推荐系统、搜索或广告相关的潜在应用。该研究主要服务于数字艺术创作和图形学应用，完全偏离了当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:55:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13191v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13191v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Painting embodies a unique form of visual storytelling, where the creation process is as significant as the final artwork. Although recent advances in generative models have enabled visually compelling painting synthesis, most existing methods focus solely on final image generation or patch-based process simulation, lacking explicit stroke structure and failing to produce smooth, realistic shading. In this work, we present a differentiable stroke reconstruction framework that unifies painting, stylized texturing, and smudging to faithfully reproduce the human painting-smudging loop. Given an input image, our framework first optimizes single- and dual-color Bezier strokes through a parallel differentiable paint renderer, followed by a style generation module that synthesizes geometry-conditioned textures across diverse painting styles. We further introduce a differentiable smudge operator to enable natural color blending and shading. Coupled with a coarse-to-fine optimization strategy, our method jointly optimizes stroke geometry, color, and texture under geometric and semantic guidance. Extensive experiments on oil, watercolor, ink, and digital paintings demonstrate that our approach produces realistic and expressive stroke reconstructions, smooth tonal transitions, and richly stylized appearances, offering a unified model for expressive digital painting creation. See our project page for more demos: https://yingjiang96.github.io/DiffPaintWebsite/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13183v1" target="_blank" rel="noopener noreferrer">
                GenTract：生成式全局纤维束追踪
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GenTract: Generative Global Tractography
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Alec Sargood, Lemuel Puglisi, Elinor Thompson, Mirco Musolesi, Daniel C. Alexand...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明其专注于医学影像领域的扩散磁共振成像纤维束追踪技术，这属于医学/生物学应用范畴。虽然标题中包含'生成式'一词，但该技术专门用于脑白质纤维束重建，与推荐系统、搜索或广告没有任何直接或间接的关联潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:43:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13183v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13183v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Tractography is the process of inferring the trajectories of white-matter pathways in the brain from diffusion magnetic resonance imaging (dMRI). Local tractography methods, which construct streamlines by following local fiber orientation estimates stepwise through an image, are prone to error accumulation and high false positive rates, particularly on noisy or low-resolution data. In contrast, global methods, which attempt to optimize a collection of streamlines to maximize compatibility with underlying fiber orientation estimates, are computationally expensive. To address these challenges, we introduce GenTract, the first generative model for global tractography. We frame tractography as a generative task, learning a direct mapping from dMRI to complete, anatomically plausible streamlines. We compare both diffusion-based and flow matching paradigms and evaluate GenTract's performance against state-of-the-art baselines. Notably, GenTract achieves precision 2.1x higher than the next-best method, TractOracle. This advantage becomes even more pronounced in challenging low-resolution and noisy settings, where it outperforms the closest competitor by an order of magnitude. By producing tractograms with high precision on research-grade data while also maintaining reliability on imperfect, lower-resolution data, GenTract represents a promising solution for global tractography.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13175v1" target="_blank" rel="noopener noreferrer">
                HDW-SR：基于小波分解的高频引导扩散模型用于图像超分辨率
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            HDW-SR: High-Frequency Guided Diffusion Model based on Wavelet Decomposition for Image Super-Resolution
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chao Yang, Boqian Zhang, Jinghao Xu, Guang Jiang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像超分辨率技术，使用小波分解和扩散模型来处理图像高频分量。虽然扩散模型是生成模型的一种，但该工作纯粹针对视觉图像处理，没有展示与推荐系统、搜索或广告的潜在应用关联，完全属于被排除的视觉领域研究。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:25:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13175v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13175v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion-based methods have shown great promise in single image super-resolution (SISR); however, existing approaches often produce blurred fine details due to insufficient guidance in the high-frequency domain. To address this issue, we propose a High-Frequency Guided Diffusion Network based on Wavelet Decomposition (HDW-SR), which replaces the conventional U-Net backbone in diffusion frameworks. Specifically, we perform diffusion only on the residual map, allowing the network to focus more effectively on high-frequency information restoration. We then introduce wavelet-based downsampling in place of standard CNN downsampling to achieve multi-scale frequency decomposition, enabling sparse cross-attention between the high-frequency subbands of the pre-super-resolved image and the low-frequency subbands of the diffused image for explicit high-frequency guidance. Moreover, a Dynamic Thresholding Block (DTB) is designed to refine high-frequency selection during the sparse attention process. During upsampling, the invertibility of the wavelet transform ensures low-loss feature reconstruction. Experiments on both synthetic and real-world datasets demonstrate that HDW-SR achieves competitive super-resolution performance, excelling particularly in recovering fine-grained image details. The code will be available after acceptance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13170v1" target="_blank" rel="noopener noreferrer">
                THIR：基于拓扑结构的病理图像检索
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            THIR: Topological Histopathological Image Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zahra Tabatabaei, Jon Sporring
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1"></p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:18:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13170v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13170v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    According to the World Health Organization, breast cancer claimed the lives of approximately 685,000 women in 2020. Early diagnosis and accurate clinical decision making are critical in reducing this global burden. In this study, we propose THIR, a novel Content-Based Medical Image Retrieval (CBMIR) framework that leverages topological data analysis specifically, Betti numbers derived from persistent homology to characterize and retrieve histopathological images based on their intrinsic structural patterns. Unlike conventional deep learning approaches that rely on extensive training, annotated datasets, and powerful GPU resources, THIR operates entirely without supervision. It extracts topological fingerprints directly from RGB histopathological images using cubical persistence, encoding the evolution of loops as compact, interpretable feature vectors. The similarity retrieval is then performed by computing the distances between these topological descriptors, efficiently returning the top-K most relevant matches. Extensive experiments on the BreaKHis dataset demonstrate that THIR outperforms state of the art supervised and unsupervised methods. It processes the entire dataset in under 20 minutes on a standard CPU, offering a fast, scalable, and training free solution for clinical image retrieval.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13168v1" target="_blank" rel="noopener noreferrer">
                SOMA：面向SAR-光学图像配准的特征梯度增强仿射流匹配方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SOMA: Feature Gradient Enhanced Affine-Flow Matching for SAR-Optical Registration
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haodong Wang, Tao Zhuo, Xiuwei Zhang, Hanlin Yin, Wencong Wu, Yanning Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于SAR-光学图像配准这一计算机视觉特定领域，属于遥感图像处理范畴。虽然涉及特征匹配技术，但缺乏与推荐系统、搜索或广告领域的直接关联，也没有展示在异构数据处理方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 09:14:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13168v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13168v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Achieving pixel-level registration between SAR and optical images remains a challenging task due to their fundamentally different imaging mechanisms and visual characteristics. Although deep learning has achieved great success in many cross-modal tasks, its performance on SAR-Optical registration tasks is still unsatisfactory. Gradient-based information has traditionally played a crucial role in handcrafted descriptors by highlighting structural differences. However, such gradient cues have not been effectively leveraged in deep learning frameworks for SAR-Optical image matching. To address this gap, we propose SOMA, a dense registration framework that integrates structural gradient priors into deep features and refines alignment through a hybrid matching strategy. Specifically, we introduce the Feature Gradient Enhancer (FGE), which embeds multi-scale, multi-directional gradient filters into the feature space using attention and reconstruction mechanisms to boost feature distinctiveness. Furthermore, we propose the Global-Local Affine-Flow Matcher (GLAM), which combines affine transformation and flow-based refinement within a coarse-to-fine architecture to ensure both structural consistency and local accuracy. Experimental results demonstrate that SOMA significantly improves registration precision, increasing the CMR@1px by 12.29% on the SEN1-2 dataset and 18.50% on the GFGE_SO dataset. In addition, SOMA exhibits strong robustness and generalizes well across diverse scenes and resolutions.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13150v1" target="_blank" rel="noopener noreferrer">
                骨架比文本更响亮：一种面向视频行人重识别的运动感知预训练范式
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Skeletons Speak Louder than Text: A Motion-Aware Pretraining Paradigm for Video-Based Person Re-Identification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rifen Lin, Alex Jinpeng Wang, Jiawei Mo, Min Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于计算机视觉中的行人重识别任务，特别是基于视频的运动分析。虽然提到了预训练范式，但其核心是视觉模态（骨架运动）的处理，与推荐系统、搜索或广告的技术焦点没有直接关联。该工作属于纯粹的视觉研究范畴，没有展示出在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:59:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13150v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13150v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal pretraining has revolutionized visual understanding, but its impact on video-based person re-identification (ReID) remains underexplored. Existing approaches often rely on video-text pairs, yet suffer from two fundamental limitations: (1) lack of genuine multimodal pretraining, and (2) text poorly captures fine-grained temporal motion-an essential cue for distinguishing identities in video. In this work, we take a bold departure from text-based paradigms by introducing the first skeleton-driven pretraining framework for ReID. To achieve this, we propose Contrastive Skeleton-Image Pretraining for ReID (CSIP-ReID), a novel two-stage method that leverages skeleton sequences as a spatiotemporally informative modality aligned with video frames. In the first stage, we employ contrastive learning to align skeleton and visual features at sequence level. In the second stage, we introduce a dynamic Prototype Fusion Updater (PFU) to refine multimodal identity prototypes, fusing motion and appearance cues. Moreover, we propose a Skeleton Guided Temporal Modeling (SGTM) module that distills temporal cues from skeleton data and integrates them into visual features. Extensive experiments demonstrate that CSIP-ReID achieves new state-of-the-art results on standard video ReID benchmarks (MARS, LS-VID, iLIDS-VID). Moreover, it exhibits strong generalization to skeleton-only ReID tasks (BIWI, IAS), significantly outperforming previous methods. CSIP-ReID pioneers an annotation-free and motion-aware pretraining paradigm for ReID, opening a new frontier in multimodal representation learning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13145v1" target="_blank" rel="noopener noreferrer">
                基于视觉Transformer和生成对抗网络的自动化道路病害检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Automated Road Distress Detection Using Vision Transformersand Generative Adversarial Networks
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cesar Portocarrero Rodriguez, Laura Vandeweyen, Yosuke Yamamoto
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉在道路检测领域的应用，属于纯粹的视觉任务。虽然使用了Transformer架构，但应用场景与推荐系统、搜索或广告完全无关。该技术没有明显的潜力应用于RecSys/Search/Ads领域，因为道路病害检测是高度专业化的视觉任务，缺乏与用户行为建模、内容推荐或搜索排序的关联性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:56:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13145v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13145v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The American Society of Civil Engineers has graded Americas infrastructure condition as a C, with the road system receiving a dismal D. Roads are vital to regional economic viability, yet their management, maintenance, and repair processes remain inefficient, relying on outdated manual or laser-based inspection methods that are both costly and time-consuming. With the increasing availability of real-time visual data from autonomous vehicles, there is an opportunity to apply computer vision (CV) methods for advanced road monitoring, providing insights to guide infrastructure rehabilitation efforts. This project explores the use of state-of-the-art CV techniques for road distress segmentation. It begins by evaluating synthetic data generated with Generative Adversarial Networks (GANs) to assess its usefulness for model training. The study then applies Convolutional Neural Networks (CNNs) for road distress segmentation and subsequently examines the transformer-based model MaskFormer. Results show that GAN-generated data improves model performance and that MaskFormer outperforms the CNN model in two metrics: mAP50 and IoU.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13138v1" target="_blank" rel="noopener noreferrer">
                WinMamba：用于3D目标检测的状态空间模型中多尺度移位窗口
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            WinMamba: Multi-Scale Shifted Windows in State Space Model for 3D Object Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Longhui Zheng, Qiming Xia, Xiaolu Chen, Zhaoliang Liu, Chenglu Wen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D目标检测，这是一个纯粹的计算机视觉任务，与推荐系统、搜索或广告没有直接关联。虽然提到了状态空间模型（可能涉及序列建模），但应用场景限定在3D视觉领域，没有展示任何在RecSys/Search/Ads中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:46:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13138v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13138v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D object detection is critical for autonomous driving, yet it remains fundamentally challenging to simultaneously maximize computational efficiency and capture long-range spatial dependencies. We observed that Mamba-based models, with their linear state-space design, capture long-range dependencies at lower cost, offering a promising balance between efficiency and accuracy. However, existing methods rely on axis-aligned scanning within a fixed window, inevitably discarding spatial information. To address this problem, we propose WinMamba, a novel Mamba-based 3D feature-encoding backbone composed of stacked WinMamba blocks. To enhance the backbone with robust multi-scale representation, the WinMamba block incorporates a window-scale-adaptive module that compensates voxel features across varying resolutions during sampling. Meanwhile, to obtain rich contextual cues within the linear state space, we equip the WinMamba layer with a learnable positional encoding and a window-shift strategy. Extensive experiments on the KITTI and Waymo datasets demonstrate that WinMamba significantly outperforms the baseline. Ablation studies further validate the individual contributions of the WSF and AWF modules in improving detection accuracy. The code will be made publicly available.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13135v1" target="_blank" rel="noopener noreferrer">
                MedGEN-Bench：用于开放式多模态医学生成的上下文纠缠基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MedGEN-Bench: Contextually entangled benchmark for open-ended multimodal medical generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junjie Yang, Yuhao Yan, Gang Wu, Yuxuan Wang, Ruoyu Liang, Xinjie Jiang, Xiang W...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确聚焦于医学领域的多模态生成基准测试，属于医疗生物领域的特定应用。这直接落入不相关主题中的'Medical, Biology, Chemistry, Physics或其它领域特定应用'类别，与推荐系统、搜索或广告的核心技术进展完全无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:41:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13135v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13135v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As Vision-Language Models (VLMs) increasingly gain traction in medical applications, clinicians are progressively expecting AI systems not only to generate textual diagnoses but also to produce corresponding medical images that integrate seamlessly into authentic clinical workflows. Despite the growing interest, existing medical visual benchmarks present notable limitations. They often rely on ambiguous queries that lack sufficient relevance to image content, oversimplify complex diagnostic reasoning into closed-ended shortcuts, and adopt a text-centric evaluation paradigm that overlooks the importance of image generation capabilities. To address these challenges, we introduce \textsc{MedGEN-Bench}, a comprehensive multimodal benchmark designed to advance medical AI research. MedGEN-Bench comprises 6,422 expert-validated image-text pairs spanning six imaging modalities, 16 clinical tasks, and 28 subtasks. It is structured into three distinct formats: Visual Question Answering, Image Editing, and Contextual Multimodal Generation. What sets MedGEN-Bench apart is its focus on contextually intertwined instructions that necessitate sophisticated cross-modal reasoning and open-ended generative outputs, moving beyond the constraints of multiple-choice formats. To evaluate the performance of existing systems, we employ a novel three-tier assessment framework that integrates pixel-level metrics, semantic text analysis, and expert-guided clinical relevance scoring. Using this framework, we systematically assess 10 compositional frameworks, 3 unified models, and 5 VLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13132v1" target="_blank" rel="noopener noreferrer">
                揭示视觉语言导航鲁棒性：基于室内光照的黑盒对抗攻击框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chenyang Li, Wenbing Tang, Yihao Huang, Sinong Simon Zhan, Ming Hu, Xiaojun Jia,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉语言导航（VLN）的对抗攻击，属于纯粹的视觉-语言交互研究领域。虽然涉及多模态建模，但其核心关注点（室内导航、对抗攻击、光照变化）与推荐系统、搜索或广告的技术需求没有直接关联，且不符合任何指定的关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:39:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13132v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13132v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-and-Language Navigation (VLN) agents have made remarkable progress, but their robustness remains insufficiently studied. Existing adversarial evaluations often rely on perturbations that manifest as unusual textures rarely encountered in everyday indoor environments. Errors under such contrived conditions have limited practical relevance, as real-world agents are unlikely to encounter such artificial patterns. In this work, we focus on indoor lighting, an intrinsic yet largely overlooked scene attribute that strongly influences navigation. We propose Indoor Lighting-based Adversarial Attack (ILA), a black-box framework that manipulates global illumination to disrupt VLN agents. Motivated by typical household lighting usage, we design two attack modes: Static Indoor Lighting-based Attack (SILA), where the lighting intensity remains constant throughout an episode, and Dynamic Indoor Lighting-based Attack (DILA), where lights are switched on or off at critical moments to induce abrupt illumination changes. We evaluate ILA on two state-of-the-art VLN models across three navigation tasks. Results show that ILA significantly increases failure rates while reducing trajectory efficiency, revealing previously unrecognized vulnerabilities of VLN agents to realistic indoor lighting variations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13127v1" target="_blank" rel="noopener noreferrer">
                VEIL：通过隐式语言视觉利用越狱文本到视频模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VEIL: Jailbreaking Text-to-Video Models via Visual Exploitation from Implicit Language
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zonghao Ying, Moyang Chen, Nizhang Li, Zhiqiang Wang, Wenxin Zhang, Quanchen Zou...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本到视频模型的越狱攻击和视觉安全漏洞，属于计算机视觉安全领域。这与推荐系统、搜索或广告的核心技术焦点完全无关，也不涉及任何可能应用于这些领域的LLM或Transformer架构进展。论文内容纯粹关注视觉模型的安全漏洞利用，属于明确的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:31:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13127v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13127v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Jailbreak attacks can circumvent model safety guardrails and reveal critical blind spots. Prior attacks on text-to-video (T2V) models typically add adversarial perturbations to obviously unsafe prompts, which are often easy to detect and defend. In contrast, we show that benign-looking prompts containing rich, implicit cues can induce T2V models to generate semantically unsafe videos that both violate policy and preserve the original (blocked) intent. To realize this, we propose VEIL, a jailbreak framework that leverages T2V models' cross-modal associative patterns via a modular prompt design. Specifically, our prompts combine three components: neutral scene anchors, which provide the surface-level scene description extracted from the blocked intent to maintain plausibility; latent auditory triggers, textual descriptions of innocuous-sounding audio events (e.g., creaking, muffled noises) that exploit learned audio-visual co-occurrence priors to bias the model toward particular unsafe visual concepts; and stylistic modulators, cinematic directives (e.g., camera framing, atmosphere) that amplify and stabilize the latent trigger's effect. We formalize attack generation as a constrained optimization over the above modular prompt space and solve it with a guided search procedure that balances stealth and effectiveness. Extensive experiments over 7 T2V models demonstrate the efficacy of our attack, achieving a 23 percent improvement in average attack success rate in commercial models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13115v1" target="_blank" rel="noopener noreferrer">
                一种具有旋转不变特征的轻量级3D异常检测方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Lightweight 3D Anomaly Detection Method with Rotationally Invariant Features
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hanzhe Liang, Jie Zhou, Can Gao, Bingyang Guo, Jinbao Wang, Linlin Shen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D异常检测，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术没有直接关联。旋转不变特征和3D数据处理在RecSys/Search/Ads领域缺乏明确的应用场景，无法建立与当前关注点的技术联系。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:16:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13115v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13115v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D anomaly detection (AD) is a crucial task in computer vision, aiming to identify anomalous points or regions from point cloud data. However, existing methods may encounter challenges when handling point clouds with changes in orientation and position because the resulting features may vary significantly. To address this problem, we propose a novel Rotationally Invariant Features (RIF) framework for 3D AD. Firstly, to remove the adverse effect of variations on point cloud data, we develop a Point Coordinate Mapping (PCM) technique, which maps each point into a rotationally invariant space to maintain consistency of representation. Then, to learn robust and discriminative features, we design a lightweight Convolutional Transform Feature Network (CTF-Net) to extract rotationally invariant features for the memory bank. To improve the ability of the feature extractor, we introduce the idea of transfer learning to pre-train the feature extractor with 3D data augmentation. Experimental results show that the proposed method achieves the advanced performance on the Anomaly-ShapeNet dataset, with an average P-AUROC improvement of 17.7\%, and also gains the best performance on the Real3D-AD dataset, with an average P-AUROC improvement of 1.6\%. The strong generalization ability of RIF has been verified by combining it with traditional feature extraction methods on anomaly detection tasks, demonstrating great potential for industrial applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13110v1" target="_blank" rel="noopener noreferrer">
                学习隐式神经退化表示用于无配对图像去雾
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning Implicit Neural Degradation Representation for Unpaired Image Dehazing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuaibin Fan, Senming Zhong, Wenchao Yan, Minglong Xue
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于计算机视觉中的图像去雾任务，属于纯粹的图像处理领域。虽然标题中提到'学习表示'，但这与推荐系统、搜索或广告中的用户行为建模、内容理解或排序任务没有直接关联。该技术缺乏在RecSys/Search/Ads领域的明显应用潜力，属于被排除的'纯粹视觉'类别。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:07:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13110v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13110v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Image dehazing is an important task in the field of computer vision, aiming at restoring clear and detail-rich visual content from haze-affected images. However, when dealing with complex scenes, existing methods often struggle to strike a balance between fine-grained feature representation of inhomogeneous haze distribution and global consistency modeling. Furthermore, to better learn the common degenerate representation of haze in spatial variations, we propose an unsupervised dehaze method for implicit neural degradation representation. Firstly, inspired by the Kolmogorov-Arnold representation theorem, we propose a mechanism combining the channel-independent and channel-dependent mechanisms, which efficiently enhances the ability to learn from nonlinear dependencies. which in turn achieves good visual perception in complex scenes. Moreover, we design an implicit neural representation to model haze degradation as a continuous function to eliminate redundant information and the dependence on explicit feature extraction and physical models. To further learn the implicit representation of the haze features, we also designed a dense residual enhancement module from it to eliminate redundant information. This achieves high-quality image restoration. Experimental results show that our method achieves competitive dehaze performance on various public and real-world datasets. This project code will be available at https://github.com/Fan-pixel/NeDR-Dehaze.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13106v1" target="_blank" rel="noopener noreferrer">
                用于医学图像增强的低层级数据集蒸馏
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Low-Level Dataset Distillation for Medical Image Enhancement
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fengzhi Xu, Ziyuan Yang, Mengyu Sun, Joey Tianyi Zhou, Yi Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像领域的特定应用，属于明确的无关主题范畴。论文标题表明其涉及医学图像增强和数据集蒸馏技术，这些内容与推荐系统、搜索或广告领域没有任何直接或间接的技术关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:05:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13106v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13106v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Medical image enhancement is clinically valuable, but existing methods require large-scale datasets to learn complex pixel-level mappings. However, the substantial training and storage costs associated with these datasets hinder their practical deployment. While dataset distillation (DD) can alleviate these burdens, existing methods mainly target high-level tasks, where multiple samples share the same label. This many-to-one mapping allows distilled data to capture shared semantics and achieve information compression. In contrast, low-level tasks involve a many-to-many mapping that requires pixel-level fidelity, making low-level DD an underdetermined problem, as a small distilled dataset cannot fully constrain the dense pixel-level mappings. To address this, we propose the first low-level DD method for medical image enhancement. We first leverage anatomical similarities across patients to construct the shared anatomical prior based on a representative patient, which serves as the initialization for the distilled data of different patients. This prior is then personalized for each patient using a Structure-Preserving Personalized Generation (SPG) module, which integrates patient-specific anatomical information into the distilled dataset while preserving pixel-level fidelity. For different low-level tasks, the distilled data is used to construct task-specific high- and low-quality training pairs. Patient-specific knowledge is injected into the distilled data by aligning the gradients computed from networks trained on the distilled pairs with those from the corresponding patient's raw data. Notably, downstream users cannot access raw patient data. Instead, only a distilled dataset containing abstract training information is shared, which excludes patient-specific details and thus preserves privacy.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13105v1" target="_blank" rel="noopener noreferrer">
                PlugTrack：多感知运动分析用于多目标跟踪中的自适应融合
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PlugTrack: Multi-Perceptive Motion Analysis for Adaptive Fusion in Multi-Object Tracking
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Seungjae Kim, SeungJoon Lee, MyeongAh Cho
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域中的多目标跟踪技术，涉及运动分析和多模态融合。虽然标题提到'自适应融合'，但这纯粹是视觉跟踪中的技术，与推荐系统、搜索或广告领域没有任何直接或间接的联系，也不涉及LLM、Transformer架构或异构数据处理。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 08:03:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13105v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13105v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multi-object tracking (MOT) predominantly follows the tracking-by-detection paradigm, where Kalman filters serve as the standard motion predictor due to computational efficiency but inherently fail on non-linear motion patterns. Conversely, recent data-driven motion predictors capture complex non-linear dynamics but suffer from limited domain generalization and computational overhead. Through extensive analysis, we reveal that even in datasets dominated by non-linear motion, Kalman filter outperforms data-driven predictors in up to 34\% of cases, demonstrating that real-world tracking scenarios inherently involve both linear and non-linear patterns. To leverage this complementarity, we propose PlugTrack, a novel framework that adaptively fuses Kalman filter and data-driven motion predictors through multi-perceptive motion understanding. Our approach employs multi-perceptive motion analysis to generate adaptive blending factors. PlugTrack achieves significant performance gains on MOT17/MOT20 and state-of-the-art on DanceTrack without modifying existing motion predictors. To the best of our knowledge, PlugTrack is the first framework to bridge classical and modern motion prediction paradigms through adaptive fusion in MOT.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13102v1" target="_blank" rel="noopener noreferrer">
                CapeNext：重新思考并优化用于类别无关姿态估计的动态支持信息
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CapeNext: Rethinking and refining dynamic support information for category-agnostic pose estimation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yu Zhu, Dan Zeng, Shuiwang Li, Qijun Zhao, Qiaomu Shen, Bo Tang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的姿态估计技术，属于纯粹的视觉研究方向。姿态估计与推荐系统、搜索或广告的核心技术栈没有直接关联，也不涉及LLM、Transformer架构或异构数据处理等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:56:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13102v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13102v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent research in Category-Agnostic Pose Estimation (CAPE) has adopted fixed textual keypoint description as semantic prior for two-stage pose matching frameworks. While this paradigm enhances robustness and flexibility by disentangling the dependency of support images, our critical analysis reveals two inherent limitations of static joint embedding: (1) polysemy-induced cross-category ambiguity during the matching process(e.g., the concept "leg" exhibiting divergent visual manifestations across humans and furniture), and (2) insufficient discriminability for fine-grained intra-category variations (e.g., posture and fur discrepancies between a sleeping white cat and a standing black cat). To overcome these challenges, we propose a new framework that innovatively integrates hierarchical cross-modal interaction with dual-stream feature refinement, enhancing the joint embedding with both class-level and instance-specific cues from textual description and specific images. Experiments on the MP-100 dataset demonstrate that, regardless of the network backbone, CapeNext consistently outperforms state-of-the-art CAPE methods by a large margin.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13099v1" target="_blank" rel="noopener noreferrer">
                MergeSlide：全切片图像终身学习中的持续模型融合与任务到类别的提示对齐推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MergeSlide: Continual Model Merging and Task-to-Class Prompt-Aligned Inference for Lifelong Learning on Whole Slide Images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Doanh C. Bui, Ba Hung Ngo, Hoai Luan Pham, Khang Nguyen, Maï K. Nguyen, Yasuhiko...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像（全切片图像）的终身学习，这属于医学/生物领域的特定应用，与推荐系统、搜索或广告无关。虽然提到了模型融合和提示对齐技术，但这些技术是在医学图像分析的背景下提出的，没有明确的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:51:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13099v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13099v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Lifelong learning on Whole Slide Images (WSIs) aims to train or fine-tune a unified model sequentially on cancer-related tasks, reducing the resources and effort required for data transfer and processing, especially given the gigabyte-scale size of WSIs. In this paper, we introduce MergeSlide, a simple yet effective framework that treats lifelong learning as a model merging problem by leveraging a vision-language pathology foundation model. When a new task arrives, it is: 1) defined with class-aware prompts, 2) fine-tuned for a few epochs using an MLP-free backbone, and 3) merged into a unified model using an orthogonal continual merging strategy that preserves performance and mitigates catastrophic forgetting. For inference under the class-incremental learning (CLASS-IL) setting, where task identity is unknown, we introduce Task-to-Class Prompt-aligned (TCP) inference. Specifically, TCP first identifies the most relevant task using task-level prompts and then applies the corresponding class-aware prompts to generate predictions. To evaluate MergeSlide, we conduct experiments on a stream of six TCGA datasets. The results show that MergeSlide outperforms both rehearsal-based continual learning and vision-language zero-shot baselines. Code and data are available at https://github.com/caodoanh2001/MergeSlide.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13087v1" target="_blank" rel="noopener noreferrer">
                MEGA-GUI：用于GUI元素的多阶段增强接地代理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MEGA-GUI: Multi-stage Enhanced Grounding Agents for GUI Elements
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>SeokJoo Kwak, Jihoon Kim, Boyoun Kim, Jung Jae Yoon, Wooseok Jang, Jeonghoon Hon...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于图形用户界面(GUI)元素的接地代理技术，属于计算机视觉和人机交互领域。虽然涉及多阶段增强处理，但GUI元素识别与推荐系统、搜索或广告的核心技术栈没有直接关联，也无法看出在Transformer架构效率或LLM应用方面的潜在价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:38:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13087v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13087v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Graphical User Interface (GUI) grounding - the task of mapping natural language instructions to screen coordinates - is essential for autonomous agents and accessibility technologies. Existing systems rely on monolithic models or one-shot pipelines that lack modularity and fail under visual clutter and ambiguous instructions. We introduce MEGA-GUI, a multi-stage framework that separates grounding into coarse Region-of-Interest (ROI) selection and fine-grained element grounding, orchestrated by specialized vision-language agents. MEGA-GUI features a bidirectional ROI zoom algorithm that mitigates spatial dilution and a context-aware rewriting agent that reduces semantic ambiguity. Our analysis reveals complementary strengths and weaknesses across vision-language models at different visual scales, and we show that leveraging this modular structure achieves consistently higher accuracy than monolithic approaches. On the visually dense ScreenSpot-Pro benchmark, MEGA-GUI attains 73.18% accuracy, and on the semantically complex OSWorld-G benchmark it reaches 68.63%, surpassing previously reported results. Code and the Grounding Benchmark Toolkit (GBT) are available at https://github.com/samsungsds-research-papers/mega-gui.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13082v1" target="_blank" rel="noopener noreferrer">
                使用形变感知图神经网络实时预测乳腺癌部位
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Real-time prediction of breast cancer sites using deformation-aware graph neural network
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kyunghyun Lee, Yong-Min Shin, Minwoo Shin, Jihun Kim, Sunghwan Lim, Won-Yong Shi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于乳腺癌预测这一医学领域应用，属于明确的医疗领域特定应用。标题中提到的图神经网络技术虽然具有通用性，但论文的应用场景完全限定在医疗诊断领域，与搜索、推荐、广告等商业系统没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:32:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13082v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13082v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Early diagnosis of breast cancer is crucial, enabling the establishment of appropriate treatment plans and markedly enhancing patient prognosis. While direct magnetic resonance imaging-guided biopsy demonstrates promising performance in detecting cancer lesions, its practical application is limited by prolonged procedure times and high costs. To overcome these issues, an indirect MRI-guided biopsy that allows the procedure to be performed outside of the MRI room has been proposed, but it still faces challenges in creating an accurate real-time deformable breast model. In our study, we tackled this issue by developing a graph neural network (GNN)-based model capable of accurately predicting deformed breast cancer sites in real time during biopsy procedures. An individual-specific finite element (FE) model was developed by incorporating magnetic resonance (MR) image-derived structural information of the breast and tumor to simulate deformation behaviors. A GNN model was then employed, designed to process surface displacement and distance-based graph data, enabling accurate prediction of overall tissue displacement, including the deformation of the tumor region. The model was validated using phantom and real patient datasets, achieving an accuracy within 0.2 millimeters (mm) for cancer node displacement (RMSE) and a dice similarity coefficient (DSC) of 0.977 for spatial overlap with actual cancerous regions. Additionally, the model enabled real-time inference and achieved a speed-up of over 4,000 times in computational cost compared to conventional FE simulations. The proposed deformation-aware GNN model offers a promising solution for real-time tumor displacement prediction in breast biopsy, with high accuracy and real-time capability. Its integration with clinical procedures could significantly enhance the precision and efficiency of breast cancer diagnosis.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13079v1" target="_blank" rel="noopener noreferrer">
                解耦场景感知与自车状态：一种用于增强端到端自动驾驶泛化能力的多上下文融合方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Decoupling Scene Perception and Ego Status: A Multi-Context Fusion Approach for Enhanced Generalization in End-to-End Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiacheng Tang, Mingyue Feng, Jiachao Liu, Yaonong Wang, Jian Pu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域，涉及场景感知、车辆状态和端到端驾驶系统。这些主题与搜索、推荐或广告系统没有直接关联，也不涉及LLM技术、Transformer架构进展或异构数据统一建模。自动驾驶是一个完全不同的应用领域，超出了当前关注的技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:27:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13079v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13079v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Modular design of planning-oriented autonomous driving has markedly advanced end-to-end systems. However, existing architectures remain constrained by an over-reliance on ego status, hindering generalization and robust scene understanding. We identify the root cause as an inherent design within these architectures that allows ego status to be easily leveraged as a shortcut. Specifically, the premature fusion of ego status in the upstream BEV encoder allows an information flow from this strong prior to dominate the downstream planning module. To address this challenge, we propose AdaptiveAD, an architectural-level solution based on a multi-context fusion strategy. Its core is a dual-branch structure that explicitly decouples scene perception and ego status. One branch performs scene-driven reasoning based on multi-task learning, but with ego status deliberately omitted from the BEV encoder, while the other conducts ego-driven reasoning based solely on the planning task. A scene-aware fusion module then adaptively integrates the complementary decisions from the two branches to form the final planning trajectory. To ensure this decoupling does not compromise multi-task learning, we introduce a path attention mechanism for ego-BEV interaction and add two targeted auxiliary tasks: BEV unidirectional distillation and autoregressive online mapping. Extensive evaluations on the nuScenes dataset demonstrate that AdaptiveAD achieves state-of-the-art open-loop planning performance. Crucially, it significantly mitigates the over-reliance on ego status and exhibits impressive generalization capabilities across diverse scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13065v1" target="_blank" rel="noopener noreferrer">
                RobustGait：基于外观的步态识别的鲁棒性分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RobustGait: Robustness Analysis for Appearance Based Gait Recognition
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Reeshoon Sayera, Akash Kumar, Sirshapan Mitra, Prudvi Kamtam, Yogesh S Rawat
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的步态识别技术，属于生物特征识别范畴。虽然涉及模式识别，但与推荐系统、搜索或广告的核心技术领域没有直接关联，也不涉及LLM、Transformer架构或异构数据处理等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:12:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13065v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13065v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Appearance-based gait recognition have achieved strong performance on controlled datasets, yet systematic evaluation of its robustness to real-world corruptions and silhouette variability remains lacking. We present RobustGait, a framework for fine-grained robustness evaluation of appearance-based gait recognition systems. RobustGait evaluation spans four dimensions: the type of perturbation (digital, environmental, temporal, occlusion), the silhouette extraction method (segmentation and parsing networks), the architectural capacities of gait recognition models, and various deployment scenarios. The benchmark introduces 15 corruption types at 5 severity levels across CASIA-B, CCPG, and SUSTech1K, with in-the-wild validation on MEVID, and evaluates six state-of-the-art gait systems. We came across several exciting insights. First, applying noise at the RGB level better reflects real-world degradation, and reveal how distortions propagate through silhouette extraction to the downstream gait recognition systems. Second, gait accuracy is highly sensitive to silhouette extractor biases, revealing an overlooked source of benchmark bias. Third, robustness is dependent on both the type of perturbation and the architectural design. Finally, we explore robustness-enhancing strategies, showing that noise-aware training and knowledge distillation improve performance and move toward deployment-ready systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.13055v1" target="_blank" rel="noopener noreferrer">
                基于结构不确定性感知网络与曲线点查询的单目3D车道线检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Monocular 3D Lane Detection via Structure Uncertainty-Aware Network with Curve-Point Queries
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ruixin Liu, Zejian Yuan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的3D车道线检测，属于纯粹的视觉感知任务。虽然标题提到了不确定性感知和查询机制，但这些技术主要应用于自动驾驶场景，与推荐系统、搜索或广告的排序和建模需求没有直接关联。该工作缺乏在异构数据处理或推荐相关应用中的潜在价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-17 07:01:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.13055v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.13055v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Monocular 3D lane detection is challenged by aleatoric uncertainty arising from inherent observation noise. Existing methods rely on simplified geometric assumptions, such as independent point predictions or global planar modeling, failing to capture structural variations and aleatoric uncertainty in real-world scenarios. In this paper, we propose MonoUnc, a bird's-eye view (BEV)-free 3D lane detector that explicitly models aleatoric uncertainty informed by local lane structures. Specifically, 3D lanes are projected onto the front-view (FV) space and approximated by parametric curves. Guided by curve predictions, curve-point query embeddings are dynamically generated for lane point predictions in 3D space. Each segment formed by two adjacent points is modeled as a 3D Gaussian, parameterized by the local structure and uncertainty estimations. Accordingly, a novel 3D Gaussian matching loss is designed to constrain these parameters jointly. Experiments on the ONCE-3DLanes and OpenLane datasets demonstrate that MonoUnc outperforms previous state-of-the-art (SoTA) methods across all benchmarks under stricter evaluation criteria. Additionally, we propose two comprehensive evaluation metrics for ONCE-3DLanes, calculating the average and maximum bidirectional Chamfer distances to quantify global and local errors. Codes are released at https://github.com/lrx02/MonoUnc.
                </div>
            </details>
    </div>
</div>
        </div>
    </main>

    <!-- 加载论文数据和JavaScript逻辑 -->
    <script src="static/app.js"></script>

    <script>
        document.addEventListener('DOMContentLoaded', function() {
            // 在精选论文和普通论文之间添加展开/折叠按钮
            const papersContainer = document.querySelector('#papers-container');
            if (papersContainer) {
                // 添加展开/折叠全部按钮
                const expandAllButton = document.createElement('div');
                expandAllButton.className = 'expand-toggle';
                expandAllButton.textContent = '展开/折叠全部非精选论文';
                expandAllButton.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-all');
                    this.textContent = papersContainer.classList.contains('expanded-all') ? 
                        '收起全部非精选论文' : '展开全部非精选论文';
                    
                    // 更新所有论文标题前的图标状态
                    const collapsedPapers = papersContainer.querySelectorAll('.collapsed-level-1');
                    collapsedPapers.forEach(paper => {
                        const iconElement = paper.querySelector('.expand-icon');
                        if (iconElement) {
                            iconElement.className = papersContainer.classList.contains('expanded-all') ? 
                                'expand-icon fa fa-eye' : 'expand-icon fa fa-eye-slash';
                        }
                    });
                });
                
                // 找到第一个非精选论文的位置
                const firstNormalPaper = papersContainer.querySelector('.simple-paper-card');
                if (firstNormalPaper) {
                    papersContainer.insertBefore(expandAllButton, firstNormalPaper);
                }
                
                // 添加分割线用于展开分数<=1的论文
                const divider = document.createElement('div');
                divider.className = 'papers-divider';
                
                const dividerLabel = document.createElement('div');
                dividerLabel.className = 'papers-divider-label';
                dividerLabel.textContent = '点击展开更多论文（评分较低）';
                dividerLabel.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-level-2');
                    this.textContent = papersContainer.classList.contains('expanded-level-2') ? 
                        '点击收起低分论文' : '点击展开更多论文（评分较低）';
                });
                
                divider.appendChild(dividerLabel);
                
                // 在所有非精选论文的最后一个元素后面添加分割线
                const normalPapers = papersContainer.querySelectorAll('.simple-paper-card');
                if (normalPapers.length > 0) {
                    const lastNormalPaper = normalPapers[normalPapers.length - 1];
                    papersContainer.insertBefore(divider, lastNormalPaper.nextSibling);
                }
            }
            
            // 为每个非精选论文添加点击标题展开/折叠详情的功能
            const collapsedPapers = document.querySelectorAll('.collapsed-level-1');
            collapsedPapers.forEach(paper => {
                const titleElement = paper.querySelector('h3');
                if (titleElement) {
                    titleElement.style.cursor = 'pointer';
                    
                    // 创建展开/折叠图标元素并设置样式
                    const iconElement = document.createElement('i');
                    iconElement.className = 'expand-icon fa fa-eye-slash cursor-pointer';
                    iconElement.style.marginRight = '8px';
                    
                    // 将图标插入到标题链接之前，作为同级元素
                    const linkElement = titleElement.querySelector('a');
                    if (linkElement) {
                        // 将图标直接添加到标题元素中，位于链接之前
                        titleElement.insertBefore(iconElement, linkElement);
                        
                        // 为图标单独添加点击事件处理展开/折叠
                        iconElement.addEventListener('click', function(e) {
                            e.stopPropagation(); // 阻止事件冒泡到标题元素
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                this.className = isExpanded ? 
                                    'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                this.style.marginRight = '8px';
                            }
                        });
                    }
                    
                    // 为标题元素添加点击事件，也可以展开/折叠，但会检查点击目标
                    titleElement.addEventListener('click', function(e) {
                        // 仅当点击的是标题本身（非链接、非图标）时才展开/折叠
                        if (!e.target.closest('a') && !e.target.closest('.expand-icon')) {
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                const iconElement = this.querySelector('.expand-icon');
                                if (iconElement) {
                                    iconElement.className = isExpanded ? 
                                        'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                    iconElement.style.marginRight = '8px';
                                }
                            }
                        }
                    });
                }
            });
            
            // 实现"仅显示精选"按钮功能
            const showSelectedButton = document.getElementById('show-selected');
            if (showSelectedButton) {
                showSelectedButton.addEventListener('click', function() {
                    // 显示所有精选论文，隐藏所有普通论文
                    const selectedPapers = document.querySelectorAll('.paper-card');
                    const normalPapers = document.querySelectorAll('.simple-paper-card');
                    
                    selectedPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    normalPapers.forEach(paper => {
                        paper.style.display = 'none';
                    });
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${selectedPapers.length} 篇论文 (共 ${selectedPapers.length + normalPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-all').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 隐藏展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) expandToggle.style.display = 'none';
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'none';
                });
            }
            
            // 实现"全部论文"按钮功能
            const showAllButton = document.getElementById('show-all');
            if (showAllButton) {
                showAllButton.addEventListener('click', function() {
                    // 显示所有论文
                    const allPapers = document.querySelectorAll('.paper-card, .simple-paper-card');
                    allPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    // 重置折叠状态
                    papersContainer.classList.remove('expanded-all');
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${allPapers.length} 篇论文 (共 ${allPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-selected').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 重新显示展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) {
                        expandToggle.style.display = 'block';
                        expandToggle.textContent = '展开全部非精选论文';
                    }
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'block';
                });
            }
        });
    </script>
    <script>
    
    // 初始化日历
    document.addEventListener('DOMContentLoaded', () => {
        try {
            console.log('Attempting to initialize calendar...');
            initCalendar();
        } catch (error) {
            console.error('Error initializing calendar:', error);
        }
    });
    
    // 日历初始化函数
    function initCalendar() {
        const toggleBtn = document.getElementById('date-picker-toggle');
        const datePicker = document.getElementById('date-picker');
        const calendarGrid = document.getElementById('calendar-grid');
        const prevMonthBtn = document.getElementById('prev-month');
        const nextMonthBtn = document.getElementById('next-month');
        const currentMonthEl = document.getElementById('current-month');
        const selectedDateText = document.getElementById('selected-date-text');
        
        // 当前显示的日期（从页面获取）
        const currentDateStr = document.getElementById('current-date').textContent.trim().replace(/^\d+年|月|日/g, '');
        const currentDate = new Date(currentDateStr);
        let displayYear = currentDate.getFullYear();
        let displayMonth = currentDate.getMonth();
        
        // 有论文数据的日期列表
        const availableDates = ["20251105","20251107","20251009","20251113","20251030","20251111","20251031","20251017","20251021","20251010","20251024","20251022","20251029","20251114","20251118","20251016","20251015","20251028","20251014","20251112","20251106","20251023"];
        
        // 尝试从localStorage恢复选择状态
        const savedDate = localStorage.getItem('selectedDate');
        const savedYear = localStorage.getItem('selectedYear');
        const savedMonth = localStorage.getItem('selectedMonth');
        
        // 确保页面加载时显示当前选中的日期
        // 修复持久化问题：确保每次加载都能正确恢复选中状态
        if (savedDate) {
            selectedDateText.textContent = savedDate;
            if (savedYear) displayYear = parseInt(savedYear);
            if (savedMonth) displayMonth = parseInt(savedMonth);
        } else {
            // 首次加载时，将当前页面日期保存到localStorage
            const currentPageDate = currentDateStr.replace(/\//g, '-');
            selectedDateText.textContent = currentPageDate;
            localStorage.setItem('selectedDate', currentPageDate);
            localStorage.setItem('selectedYear', currentDate.getFullYear().toString());
            localStorage.setItem('selectedMonth', currentDate.getMonth().toString());
        }
    
        // 切换日历显示状态
        toggleBtn.addEventListener('click', (e) => {
            e.stopPropagation();
            
            // 显式控制hidden类的添加和移除
            if (datePicker.classList.contains('hidden')) {
                // 显示日历 - 确保移除hidden类
                datePicker.classList.remove('hidden');
                renderCalendar();
            } else {
                // 隐藏日历
                datePicker.classList.add('hidden');
            }
        });
        
        // 点击其他区域关闭日历
        document.addEventListener('click', () => {
            if (!datePicker.classList.contains('hidden')) {
                datePicker.classList.add('hidden');
            }
        });
        
        // 阻止日历内部点击事件冒泡
        datePicker.addEventListener('click', (e) => {
            e.stopPropagation();
        });
        
        // 上月和下月按钮
        prevMonthBtn.addEventListener('click', () => {
            displayMonth--;
            if (displayMonth < 0) {
                displayMonth = 11;
                displayYear--;
            }
            renderCalendar();
        });
        
        nextMonthBtn.addEventListener('click', () => {
            displayMonth++;
            if (displayMonth > 11) {
                displayMonth = 0;
                displayYear++;
            }
            renderCalendar();
        });
        
        /**
         * 渲染日历
         */
        function renderCalendar() {
            // 清空日历网格
            calendarGrid.innerHTML = '';
            
            // 更新当前月份显示
            const monthNames = ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月'];
            currentMonthEl.textContent = displayYear + '年' + monthNames[displayMonth];
            
            // 计算当前月份的第一天是星期几
            const firstDay = new Date(displayYear, displayMonth, 1);
            const firstDayOfWeek = firstDay.getDay();
            
            // 计算当前月份的天数
            const daysInMonth = new Date(displayYear, displayMonth + 1, 0).getDate();
            
            // 添加上月的占位天数
            for (let i = 0; i < firstDayOfWeek; i++) {
                const emptyDay = document.createElement('div');
                emptyDay.classList.add('py-1', 'text-gray-300');
                calendarGrid.appendChild(emptyDay);
            }
            
            // 获取当前日期（用于高亮显示）
            const today = new Date();
            today.setHours(0, 0, 0, 0);
            
            // 添加当前月份的天数
            for (let day = 1; day <= daysInMonth; day++) {
                const dayElement = document.createElement('div');
                const currentDateObj = new Date(displayYear, displayMonth, day);
                const dateStr = displayYear + String(displayMonth + 1).padStart(2, '0') + String(day).padStart(2, '0');
                const displayDateStr = displayYear + '-' + String(displayMonth + 1).padStart(2, '0') + '-' + String(day).padStart(2, '0');
                
                // 设置日期元素基本样式
                dayElement.textContent = day;
                
                // 检查该日期是否有论文数据
                const hasPapers = availableDates.includes(dateStr);
                
                if (hasPapers) {
                    // 有论文数据的日期样式
                    dayElement.classList.add('py-1', 'cursor-pointer', 'hover:bg-gray-100', 'rounded', 'bg-blue-50', 'font-medium');
                    
                    // 添加点击事件，跳转到对应日期的页面
                    dayElement.addEventListener('click', () => {
                        console.log('Date clicked:', displayDateStr);
                        selectedDateText.textContent = displayDateStr;
                        
                        // 保存选择状态到localStorage
                        localStorage.setItem('selectedDate', displayDateStr);
                        localStorage.setItem('selectedYear', displayYear.toString());
                        localStorage.setItem('selectedMonth', displayMonth.toString());
                        
                        datePicker.classList.add('hidden');
                        
                        // 构造目标URL并跳转
                        const targetUrl = 'arxiv_' + dateStr + '.html';
                        window.location.href = targetUrl;
                    });
                } else {
                    // 没有论文数据的日期样式（置灰不可点击）
                    dayElement.classList.add('py-1', 'text-gray-400', 'cursor-not-allowed');
                }
                
                // 高亮显示当天日期（覆盖之前的样式）
                if (currentDateObj.getTime() === today.getTime()) {
                    dayElement.classList.remove('bg-blue-50');
                    dayElement.classList.add('bg-primary', 'text-white', 'font-bold', 'shadow');
                    if (!hasPapers) {
                        // 当天没有论文时，仍然置灰但保持背景色
                        dayElement.classList.add('opacity-70');
                    }
                }
                
                // 高亮显示当前选中的日期
                if (displayDateStr === selectedDateText.textContent) {
                    dayElement.classList.add('font-bold', 'border-2', 'border-primary', 'rounded-lg', 'shadow-md');
                }
                
                // 增强有论文数据的日期样式，使其更明显
                if (hasPapers && currentDateObj.getTime() !== today.getTime()) {
                    dayElement.classList.add('bg-blue-100', 'hover:bg-blue-200', 'transition-colors', 'duration-200');
                }
                
                calendarGrid.appendChild(dayElement);
            }
        }
    }
    </script>
    </body>

</html>